From 6bbb82c1cb2ef37dc0ff5d13a0e05148672ed802 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 2 Dec 2025 11:40:54 -0500 Subject: [PATCH 01/59] added flock metrics to all the providers --- duckdb | 2 +- extension-ci-tools | 2 +- src/CMakeLists.txt | 1 + .../llm_first_or_last/implementation.cpp | 11 +++ .../aggregate/llm_reduce/implementation.cpp | 11 +++ .../aggregate/llm_rerank/implementation.cpp | 11 +++ .../scalar/llm_complete/implementation.cpp | 10 +++ .../scalar/llm_embedding/implementation.cpp | 11 +++ .../scalar/llm_filter/implementation.cpp | 11 +++ src/include/flock/metrics/metrics.hpp | 77 +++++++++++++++++++ .../providers/handlers/azure.hpp | 16 +++- .../providers/handlers/base_handler.hpp | 30 +++++++- .../providers/handlers/ollama.hpp | 13 +++- .../providers/handlers/openai.hpp | 16 +++- src/include/flock/registry/scalar.hpp | 2 + src/metrics/CMakeLists.txt | 4 + src/metrics/implementation.cpp | 15 ++++ src/metrics/registry.cpp | 22 ++++++ 18 files changed, 256 insertions(+), 9 deletions(-) create mode 100644 src/include/flock/metrics/metrics.hpp create mode 100644 src/metrics/CMakeLists.txt create mode 100644 src/metrics/implementation.cpp create mode 100644 src/metrics/registry.cpp diff --git a/duckdb b/duckdb index b8a06e4a..68d7555f 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e +Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a diff --git a/extension-ci-tools b/extension-ci-tools index ee7f51d0..aac96406 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit ee7f51d06562bbea87d6f6f921def85557e44d18 +Subproject commit aac9640615e51d6e7e8b72d4bf023703cfd8e479 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 47843aba..2d00f9f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(model_manager) add_subdirectory(prompt_manager) add_subdirectory(custom_parser) add_subdirectory(secret_manager) +add_subdirectory(metrics) set(EXTENSION_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/flock_extension.cpp ${EXTENSION_SOURCES} diff --git a/src/functions/aggregate/llm_first_or_last/implementation.cpp b/src/functions/aggregate/llm_first_or_last/implementation.cpp index 7135359c..485c0069 100644 --- a/src/functions/aggregate/llm_first_or_last/implementation.cpp +++ b/src/functions/aggregate/llm_first_or_last/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/aggregate/llm_first_or_last.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -75,6 +78,9 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, AggregateFunctionType function_type) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); + const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); for (idx_t i = 0; i < count; i++) { @@ -99,6 +105,11 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn result.SetValue(idx, nullptr);// Empty JSON object for null/empty states } } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/aggregate/llm_reduce/implementation.cpp b/src/functions/aggregate/llm_reduce/implementation.cpp index 9a7ce46d..44ba02ab 100644 --- a/src/functions/aggregate/llm_reduce/implementation.cpp +++ b/src/functions/aggregate/llm_reduce/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/aggregate/llm_reduce.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -64,6 +67,9 @@ nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, const AggregateFunctionType function_type) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); + const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); for (idx_t i = 0; i < count; i++) { @@ -83,6 +89,11 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa result.SetValue(idx, nullptr);// Empty result for null/empty states } } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/aggregate/llm_rerank/implementation.cpp b/src/functions/aggregate/llm_rerank/implementation.cpp index a5fbd789..c43b17f1 100644 --- a/src/functions/aggregate/llm_rerank/implementation.cpp +++ b/src/functions/aggregate/llm_rerank/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/aggregate/llm_rerank.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -114,6 +117,9 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); + const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); for (idx_t i = 0; i < count; i++) { @@ -132,6 +138,11 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg result.SetValue(idx, nullptr);// Empty result for null/empty states } } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index f4ae1509..1c354a32 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/scalar/llm_complete.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -63,6 +66,8 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { } void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); if (const auto results = LlmComplete::Operation(args); static_cast(results.size()) == 1) { auto empty_vec = duckdb::Vector(std::string()); @@ -75,6 +80,11 @@ void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& stat result.SetValue(index++, duckdb::Value(res)); } } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/scalar/llm_embedding/implementation.cpp b/src/functions/scalar/llm_embedding/implementation.cpp index 7b423257..bd4eebdd 100644 --- a/src/functions/scalar/llm_embedding/implementation.cpp +++ b/src/functions/scalar/llm_embedding/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/scalar/llm_embedding.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -71,12 +74,20 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC } void LlmEmbedding::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); + auto results = LlmEmbedding::Operation(args); auto index = 0; for (const auto& res: results) { result.SetValue(index++, duckdb::Value::LIST(res)); } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index 67b49108..fd61b1d2 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -1,4 +1,7 @@ #include "flock/functions/scalar/llm_filter.hpp" +#include "flock/metrics/metrics.hpp" + +#include namespace flock { @@ -48,12 +51,20 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { } void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + // Start execution timing + auto exec_start = std::chrono::high_resolution_clock::now(); + const auto results = LlmFilter::Operation(args); auto index = 0; for (const auto& res: results) { result.SetValue(index++, duckdb::Value(res)); } + + // End execution timing and update metrics + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/include/flock/metrics/metrics.hpp b/src/include/flock/metrics/metrics.hpp new file mode 100644 index 00000000..90896a3a --- /dev/null +++ b/src/include/flock/metrics/metrics.hpp @@ -0,0 +1,77 @@ +#pragma once + +#include "duckdb/function/scalar_function.hpp" +#include +#include +#include + +namespace flock { + +class FlockMetrics { +public: + static FlockMetrics& GetInstance() { + static FlockMetrics instance; + return instance; + } + + FlockMetrics(const FlockMetrics&) = delete; + FlockMetrics& operator=(const FlockMetrics&) = delete; + + void UpdateTokenUsage(int64_t input_tokens, int64_t output_tokens) { + std::lock_guard lock(mutex_); + total_input_tokens_ += input_tokens; + total_output_tokens_ += output_tokens; + } + + void IncrementApiCalls() { + std::lock_guard lock(mutex_); + total_api_calls_++; + } + + void AddApiDuration(double duration_ms) { + std::lock_guard lock(mutex_); + total_api_duration_ms_ += duration_ms; + } + + void AddExecutionTime(double execution_time_ms) { + std::lock_guard lock(mutex_); + total_execution_time_ms_ += execution_time_ms; + } + + nlohmann::json GetMetrics() const { + std::lock_guard lock(mutex_); + return { + {"total_input_tokens", total_input_tokens_}, + {"total_output_tokens", total_output_tokens_}, + {"total_tokens", total_input_tokens_ + total_output_tokens_}, + {"total_api_calls", total_api_calls_}, + {"total_api_duration_ms", total_api_duration_ms_}, + {"total_execution_time_ms", total_execution_time_ms_}, + {"avg_api_duration_ms", total_api_calls_ > 0 ? total_api_duration_ms_ / total_api_calls_ : 0.0}, + {"avg_execution_time_ms", total_api_calls_ > 0 ? total_execution_time_ms_ / total_api_calls_ : 0.0}}; + } + + void Reset() { + std::lock_guard lock(mutex_); + total_input_tokens_ = 0; + total_output_tokens_ = 0; + total_api_calls_ = 0; + total_api_duration_ms_ = 0.0; + total_execution_time_ms_ = 0.0; + } + + static void ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); + static void ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); + +private: + FlockMetrics() = default; + + mutable std::mutex mutex_; + int64_t total_input_tokens_ = 0; + int64_t total_output_tokens_ = 0; + int64_t total_api_calls_ = 0; + double total_api_duration_ms_ = 0.0; + double total_execution_time_ms_ = 0.0; +}; + +}// namespace flock diff --git a/src/include/flock/model_manager/providers/handlers/azure.hpp b/src/include/flock/model_manager/providers/handlers/azure.hpp index 871cc758..26d8ac41 100644 --- a/src/include/flock/model_manager/providers/handlers/azure.hpp +++ b/src/include/flock/model_manager/providers/handlers/azure.hpp @@ -30,7 +30,6 @@ class AzureModelManager : public BaseModelProviderHandler { } } } else { - // Embedding-specific checks (if any) can be added here if (response.contains("data") && response["data"].is_array() && response["data"].empty()) { throw std::runtime_error("Azure API returned empty embedding data."); } @@ -66,6 +65,21 @@ class AzureModelManager : public BaseModelProviderHandler { return {}; } + std::pair ExtractTokenUsage(const nlohmann::json& response) const override { + int64_t input_tokens = 0; + int64_t output_tokens = 0; + if (response.contains("usage") && response["usage"].is_object()) { + const auto& usage = response["usage"]; + if (usage.contains("prompt_tokens") && usage["prompt_tokens"].is_number()) { + input_tokens = usage["prompt_tokens"].get(); + } + if (usage.contains("completion_tokens") && usage["completion_tokens"].is_number()) { + output_tokens = usage["completion_tokens"].get(); + } + } + return {input_tokens, output_tokens}; + } + std::string _token; std::string _resource_name; std::string _deployment_model_name; diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 998c9cf7..9722fed2 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -1,7 +1,9 @@ #pragma once +#include "flock/metrics/metrics.hpp" #include "flock/model_manager/providers/handlers/handler.hpp" #include "session.hpp" +#include #include #include #include @@ -16,12 +18,10 @@ class BaseModelProviderHandler : public IModelProviderHandler { : _throw_exception(throw_exception) {} virtual ~BaseModelProviderHandler() = default; - // AddRequest: just add the json to the batch (type is ignored, kept for interface compatibility) void AddRequest(const nlohmann::json& json, RequestType type = RequestType::Completion) { _request_batch.push_back(json); } - // CollectCompletions: process all as completions, then clear std::vector CollectCompletions(const std::string& contentType = "application/json") { std::vector completions; if (!_request_batch.empty()) completions = ExecuteBatch(_request_batch, true, contentType, true); @@ -29,7 +29,6 @@ class BaseModelProviderHandler : public IModelProviderHandler { return completions; } - // CollectEmbeddings: process all as embeddings, then clear std::vector CollectEmbeddings(const std::string& contentType = "application/json") { std::vector embeddings; if (!_request_batch.empty()) embeddings = ExecuteBatch(_request_batch, true, contentType, false); @@ -37,7 +36,6 @@ class BaseModelProviderHandler : public IModelProviderHandler { return embeddings; } - // Unified batch implementation with customizable headers std::vector ExecuteBatch(const std::vector& jsons, bool async = true, const std::string& contentType = "application/json", bool is_completion = true) { struct CurlRequestData { std::string response; @@ -66,6 +64,9 @@ class BaseModelProviderHandler : public IModelProviderHandler { curl_easy_setopt(requests[i].easy, CURLOPT_POSTFIELDS, requests[i].payload.c_str()); curl_multi_add_handle(multi_handle, requests[i].easy); } + + auto api_start = std::chrono::high_resolution_clock::now(); + int still_running = 0; curl_multi_perform(multi_handle, &still_running); while (still_running) { @@ -73,6 +74,13 @@ class BaseModelProviderHandler : public IModelProviderHandler { curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds); curl_multi_perform(multi_handle, &still_running); } + + auto api_end = std::chrono::high_resolution_clock::now(); + double api_duration_ms = std::chrono::duration(api_end - api_start).count(); + + int64_t batch_input_tokens = 0; + int64_t batch_output_tokens = 0; + std::vector results(jsons.size()); for (size_t i = 0; i < requests.size(); ++i) { curl_easy_getinfo(requests[i].easy, CURLINFO_RESPONSE_CODE, NULL); @@ -80,6 +88,11 @@ class BaseModelProviderHandler : public IModelProviderHandler { try { nlohmann::json parsed = nlohmann::json::parse(requests[i].response); checkResponse(parsed, is_completion); + + auto [input_tokens, output_tokens] = ExtractTokenUsage(parsed); + batch_input_tokens += input_tokens; + batch_output_tokens += output_tokens; + if (is_completion) { results[i] = ExtractCompletionOutput(parsed); } else { @@ -94,6 +107,14 @@ class BaseModelProviderHandler : public IModelProviderHandler { curl_multi_remove_handle(multi_handle, requests[i].easy); curl_easy_cleanup(requests[i].easy); } + + auto& metrics = FlockMetrics::GetInstance(); + metrics.UpdateTokenUsage(batch_input_tokens, batch_output_tokens); + metrics.AddApiDuration(api_duration_ms); + for (size_t i = 0; i < jsons.size(); ++i) { + metrics.IncrementApiCalls(); + } + curl_slist_free_all(headers); curl_multi_cleanup(multi_handle); return results; @@ -113,6 +134,7 @@ class BaseModelProviderHandler : public IModelProviderHandler { virtual void checkProviderSpecificResponse(const nlohmann::json&, bool is_completion) {} virtual nlohmann::json ExtractCompletionOutput(const nlohmann::json&) const { return {}; } virtual nlohmann::json ExtractEmbeddingVector(const nlohmann::json&) const { return {}; } + virtual std::pair ExtractTokenUsage(const nlohmann::json& response) const = 0; void trigger_error(const std::string& msg) { if (_throw_exception) { diff --git a/src/include/flock/model_manager/providers/handlers/ollama.hpp b/src/include/flock/model_manager/providers/handlers/ollama.hpp index 8bf43686..165abfc3 100644 --- a/src/include/flock/model_manager/providers/handlers/ollama.hpp +++ b/src/include/flock/model_manager/providers/handlers/ollama.hpp @@ -41,7 +41,6 @@ class OllamaModelManager : public BaseModelProviderHandler { throw std::runtime_error("The request was refused due to some internal error with Ollama API"); } } else { - // Embedding-specific checks (if any) can be added here if (response.contains("embeddings") && (!response["embeddings"].is_array() || response["embeddings"].empty())) { throw std::runtime_error("Ollama API returned empty or invalid embedding data."); } @@ -62,6 +61,18 @@ class OllamaModelManager : public BaseModelProviderHandler { return {}; } + std::pair ExtractTokenUsage(const nlohmann::json& response) const override { + int64_t input_tokens = 0; + int64_t output_tokens = 0; + if (response.contains("prompt_eval_count") && response["prompt_eval_count"].is_number()) { + input_tokens = response["prompt_eval_count"].get(); + } + if (response.contains("eval_count") && response["eval_count"].is_number()) { + output_tokens = response["eval_count"].get(); + } + return {input_tokens, output_tokens}; + } + Session _session; std::string _url; }; diff --git a/src/include/flock/model_manager/providers/handlers/openai.hpp b/src/include/flock/model_manager/providers/handlers/openai.hpp index 83c9625f..86ee191f 100644 --- a/src/include/flock/model_manager/providers/handlers/openai.hpp +++ b/src/include/flock/model_manager/providers/handlers/openai.hpp @@ -65,7 +65,6 @@ class OpenAIModelManager : public BaseModelProviderHandler { } } } else { - // Embedding-specific checks (if any) can be added here if (response.contains("data") && response["data"].is_array() && response["data"].empty()) { throw std::runtime_error("OpenAI API returned empty embedding data."); } @@ -91,6 +90,21 @@ class OpenAIModelManager : public BaseModelProviderHandler { return results; } } + + std::pair ExtractTokenUsage(const nlohmann::json& response) const override { + int64_t input_tokens = 0; + int64_t output_tokens = 0; + if (response.contains("usage") && response["usage"].is_object()) { + const auto& usage = response["usage"]; + if (usage.contains("prompt_tokens") && usage["prompt_tokens"].is_number()) { + input_tokens = usage["prompt_tokens"].get(); + } + if (usage.contains("completion_tokens") && usage["completion_tokens"].is_number()) { + output_tokens = usage["completion_tokens"].get(); + } + } + return {input_tokens, output_tokens}; + } }; }// namespace flock diff --git a/src/include/flock/registry/scalar.hpp b/src/include/flock/registry/scalar.hpp index ffdb0309..2518b7d1 100644 --- a/src/include/flock/registry/scalar.hpp +++ b/src/include/flock/registry/scalar.hpp @@ -17,6 +17,8 @@ class ScalarRegistry { static void RegisterFusionCombMED(duckdb::ExtensionLoader& loader); static void RegisterFusionCombMNZ(duckdb::ExtensionLoader& loader); static void RegisterFusionCombSUM(duckdb::ExtensionLoader& loader); + static void RegisterFlockGetMetrics(duckdb::ExtensionLoader& loader); + static void RegisterFlockResetMetrics(duckdb::ExtensionLoader& loader); }; }// namespace flock diff --git a/src/metrics/CMakeLists.txt b/src/metrics/CMakeLists.txt new file mode 100644 index 00000000..4c3059d1 --- /dev/null +++ b/src/metrics/CMakeLists.txt @@ -0,0 +1,4 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/implementation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/registry.cpp + PARENT_SCOPE) diff --git a/src/metrics/implementation.cpp b/src/metrics/implementation.cpp new file mode 100644 index 00000000..3642dc57 --- /dev/null +++ b/src/metrics/implementation.cpp @@ -0,0 +1,15 @@ +#include "flock/metrics/metrics.hpp" + +namespace flock { + +void FlockMetrics::ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + auto json_str = GetInstance().GetMetrics().dump(); + result.SetValue(0, duckdb::Value(json_str)); +} + +void FlockMetrics::ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + GetInstance().Reset(); + result.SetValue(0, duckdb::Value("Metrics reset successfully")); +} + +}// namespace flock diff --git a/src/metrics/registry.cpp b/src/metrics/registry.cpp new file mode 100644 index 00000000..42d89908 --- /dev/null +++ b/src/metrics/registry.cpp @@ -0,0 +1,22 @@ +#include "flock/registry/registry.hpp" +#include "flock/metrics/metrics.hpp" + +namespace flock { + +void ScalarRegistry::RegisterFlockGetMetrics(duckdb::ExtensionLoader& loader) { + loader.RegisterFunction(duckdb::ScalarFunction( + "flock_get_metrics", + {}, + duckdb::LogicalType::JSON(), + FlockMetrics::ExecuteGetMetrics)); +} + +void ScalarRegistry::RegisterFlockResetMetrics(duckdb::ExtensionLoader& loader) { + loader.RegisterFunction(duckdb::ScalarFunction( + "flock_reset_metrics", + {}, + duckdb::LogicalType::VARCHAR, + FlockMetrics::ExecuteResetMetrics)); +} + +}// namespace flock From fa553a166c4eb1156fafd8ba3ba099426176a364 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 2 Dec 2025 11:46:36 -0500 Subject: [PATCH 02/59] upgrade gh action to DuckDB 1.4.2 --- .github/workflows/MainDistributionPipeline.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 8b7380e0..899d456b 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -26,16 +26,16 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.4.0 + duckdb_version: v1.4.2 extension_name: flock ci_tools_version: main exclude_archs: 'wasm_mvp;wasm_threads;wasm_eh' duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.2 with: - duckdb_version: v1.4.0 - ci_tools_version: v1.4.0 + duckdb_version: v1.4.2 + ci_tools_version: v1.4.2 extension_name: flock exclude_archs: 'wasm_mvp;wasm_threads;wasm_eh' From e0d4c1f7f056cd7c2a9f75592b803c5a26bd54e5 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 2 Dec 2025 13:02:27 -0500 Subject: [PATCH 03/59] registered the metrics scalar functions --- src/registry/scalar.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/registry/scalar.cpp b/src/registry/scalar.cpp index 87706f3f..0af1a009 100644 --- a/src/registry/scalar.cpp +++ b/src/registry/scalar.cpp @@ -11,6 +11,8 @@ void ScalarRegistry::Register(duckdb::ExtensionLoader& loader) { RegisterFusionCombMED(loader); RegisterFusionCombMNZ(loader); RegisterFusionCombSUM(loader); + RegisterFlockGetMetrics(loader); + RegisterFlockResetMetrics(loader); } }// namespace flock From c79dfcd7b3be97a9ca629ca357f90100a6d65232 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 2 Dec 2025 13:03:05 -0500 Subject: [PATCH 04/59] added unit tests for the metrics feature --- test/unit/functions/scalar/metrics_test.cpp | 146 ++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 test/unit/functions/scalar/metrics_test.cpp diff --git a/test/unit/functions/scalar/metrics_test.cpp b/test/unit/functions/scalar/metrics_test.cpp new file mode 100644 index 00000000..4134bf03 --- /dev/null +++ b/test/unit/functions/scalar/metrics_test.cpp @@ -0,0 +1,146 @@ +#include "flock/core/config.hpp" +#include "flock/metrics/metrics.hpp" +#include + +namespace flock { + +class MetricsTest : public ::testing::Test { +protected: + void SetUp() override { + FlockMetrics::GetInstance().Reset(); + } + + void TearDown() override { + FlockMetrics::GetInstance().Reset(); + } +}; + +TEST_F(MetricsTest, InitialMetricsAreZero) { + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + + EXPECT_EQ(metrics["total_input_tokens"].get(), 0); + EXPECT_EQ(metrics["total_output_tokens"].get(), 0); + EXPECT_EQ(metrics["total_tokens"].get(), 0); + EXPECT_EQ(metrics["total_api_calls"].get(), 0); + EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 0.0); + EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 0.0); +} + +TEST_F(MetricsTest, UpdateTokenUsage) { + FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_EQ(metrics["total_input_tokens"].get(), 100); + EXPECT_EQ(metrics["total_output_tokens"].get(), 50); + EXPECT_EQ(metrics["total_tokens"].get(), 150); +} + +TEST_F(MetricsTest, IncrementApiCalls) { + FlockMetrics::GetInstance().IncrementApiCalls(); + FlockMetrics::GetInstance().IncrementApiCalls(); + FlockMetrics::GetInstance().IncrementApiCalls(); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_EQ(metrics["total_api_calls"].get(), 3); +} + +TEST_F(MetricsTest, AddApiDuration) { + FlockMetrics::GetInstance().AddApiDuration(100.5); + FlockMetrics::GetInstance().AddApiDuration(200.25); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 300.75); +} + +TEST_F(MetricsTest, AddExecutionTime) { + FlockMetrics::GetInstance().AddExecutionTime(150.0); + FlockMetrics::GetInstance().AddExecutionTime(250.0); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 400.0); +} + +TEST_F(MetricsTest, AveragesCalculatedCorrectly) { + FlockMetrics::GetInstance().IncrementApiCalls(); + FlockMetrics::GetInstance().IncrementApiCalls(); + FlockMetrics::GetInstance().AddApiDuration(100.0); + FlockMetrics::GetInstance().AddApiDuration(200.0); + FlockMetrics::GetInstance().AddExecutionTime(150.0); + FlockMetrics::GetInstance().AddExecutionTime(250.0); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_DOUBLE_EQ(metrics["avg_api_duration_ms"].get(), 150.0); + EXPECT_DOUBLE_EQ(metrics["avg_execution_time_ms"].get(), 200.0); +} + +TEST_F(MetricsTest, AveragesZeroWhenNoApiCalls) { + FlockMetrics::GetInstance().AddApiDuration(100.0); + FlockMetrics::GetInstance().AddExecutionTime(150.0); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_DOUBLE_EQ(metrics["avg_api_duration_ms"].get(), 0.0); + EXPECT_DOUBLE_EQ(metrics["avg_execution_time_ms"].get(), 0.0); +} + +TEST_F(MetricsTest, ResetClearsAllMetrics) { + FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); + FlockMetrics::GetInstance().IncrementApiCalls(); + FlockMetrics::GetInstance().AddApiDuration(100.0); + FlockMetrics::GetInstance().AddExecutionTime(150.0); + + FlockMetrics::GetInstance().Reset(); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_EQ(metrics["total_input_tokens"].get(), 0); + EXPECT_EQ(metrics["total_output_tokens"].get(), 0); + EXPECT_EQ(metrics["total_api_calls"].get(), 0); + EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 0.0); + EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 0.0); +} + +TEST_F(MetricsTest, AccumulatesMultipleUpdates) { + FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); + FlockMetrics::GetInstance().UpdateTokenUsage(200, 100); + FlockMetrics::GetInstance().UpdateTokenUsage(50, 25); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_EQ(metrics["total_input_tokens"].get(), 350); + EXPECT_EQ(metrics["total_output_tokens"].get(), 175); + EXPECT_EQ(metrics["total_tokens"].get(), 525); +} + +TEST_F(MetricsTest, SqlFunctionFlockGetMetrics) { + FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); + FlockMetrics::GetInstance().IncrementApiCalls(); + + auto con = Config::GetConnection(); + auto results = con.Query("SELECT flock_get_metrics() AS metrics;"); + + ASSERT_FALSE(results->HasError()) << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + + auto json_str = results->GetValue(0, 0).GetValue(); + auto metrics = nlohmann::json::parse(json_str); + + EXPECT_EQ(metrics["total_input_tokens"].get(), 100); + EXPECT_EQ(metrics["total_output_tokens"].get(), 50); + EXPECT_EQ(metrics["total_api_calls"].get(), 1); +} + +TEST_F(MetricsTest, SqlFunctionFlockResetMetrics) { + FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); + FlockMetrics::GetInstance().IncrementApiCalls(); + + auto con = Config::GetConnection(); + auto results = con.Query("SELECT flock_reset_metrics() AS result;"); + + ASSERT_FALSE(results->HasError()) << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + + auto metrics = FlockMetrics::GetInstance().GetMetrics(); + EXPECT_EQ(metrics["total_input_tokens"].get(), 0); + EXPECT_EQ(metrics["total_output_tokens"].get(), 0); + EXPECT_EQ(metrics["total_api_calls"].get(), 0); +} + +}// namespace flock From 5abddab555cbd815cb5ef26d3ccb5d48f77ad395 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 2 Dec 2025 13:03:31 -0500 Subject: [PATCH 05/59] added integration tests for the metrics feature --- src/include/flock/metrics/base_manager.hpp | 298 ++++++++++++++++++ src/include/flock/metrics/context.hpp | 45 +++ src/include/flock/metrics/data_structures.hpp | 97 ++++++ src/include/flock/metrics/manager.hpp | 107 +++++++ src/include/flock/metrics/types.hpp | 44 +++ src/metrics/metrics.cpp | 50 +++ .../tests/functions/scalar/test_metrics.py | 91 ++++++ 7 files changed, 732 insertions(+) create mode 100644 src/include/flock/metrics/base_manager.hpp create mode 100644 src/include/flock/metrics/context.hpp create mode 100644 src/include/flock/metrics/data_structures.hpp create mode 100644 src/include/flock/metrics/manager.hpp create mode 100644 src/include/flock/metrics/types.hpp create mode 100644 src/metrics/metrics.cpp create mode 100644 test/integration/src/integration/tests/functions/scalar/test_metrics.py diff --git a/src/include/flock/metrics/base_manager.hpp b/src/include/flock/metrics/base_manager.hpp new file mode 100644 index 00000000..7701724c --- /dev/null +++ b/src/include/flock/metrics/base_manager.hpp @@ -0,0 +1,298 @@ +#pragma once + +#include "flock/metrics/data_structures.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace flock { + +// Core metrics tracking functionality shared between scalar and aggregate functions +template +class BaseMetricsManager { +public: + ThreadMetrics& GetThreadMetrics(const StateId& state_id) { + const auto tid = std::this_thread::get_id(); + auto& thread_map = thread_metrics_[tid]; + + auto it = thread_map.find(state_id); + if (it != thread_map.end()) { + return it->second; + } + + return thread_map[state_id]; + } + + void RegisterThread(const StateId& state_id) { + GetThreadMetrics(state_id); + } + + // Initialize metrics tracking and assign registration order + void StartInvocation(const StateId& state_id, FunctionType type) { + RegisterThread(state_id); + + const auto tid = std::this_thread::get_id(); + ThreadFunctionKey thread_function_key{tid, type}; + + if (thread_function_counters_.find(thread_function_key) == thread_function_counters_.end()) { + thread_function_counters_[thread_function_key] = 0; + } + + StateFunctionKey state_function_key{state_id, type}; + if (state_function_registration_order_.find(state_function_key) == state_function_registration_order_.end()) { + thread_function_counters_[thread_function_key]++; + state_function_registration_order_[state_function_key] = thread_function_counters_[thread_function_key]; + } + + GetThreadMetrics(state_id).GetMetrics(type); + } + + // Store model name and provider (first call wins) + void SetModelInfo(const StateId& state_id, FunctionType type, const std::string& model_name, const std::string& provider) { + auto& thread_metrics = GetThreadMetrics(state_id); + auto& metrics = thread_metrics.GetMetrics(type); + if (metrics.model_name.empty()) { + metrics.model_name = model_name; + } + if (metrics.provider.empty()) { + metrics.provider = provider; + } + } + + // Add input and output tokens (accumulative) + void UpdateTokens(const StateId& state_id, FunctionType type, int64_t input, int64_t output) { + auto& thread_metrics = GetThreadMetrics(state_id); + auto& metrics = thread_metrics.GetMetrics(type); + metrics.input_tokens += input; + metrics.output_tokens += output; + } + + // Increment API call counter + void IncrementApiCalls(const StateId& state_id, FunctionType type) { + GetThreadMetrics(state_id).GetMetrics(type).api_calls++; + } + + // Add API duration in microseconds (accumulative) + void AddApiDuration(const StateId& state_id, FunctionType type, int64_t duration_us) { + GetThreadMetrics(state_id).GetMetrics(type).api_duration_us += duration_us; + } + + // Add execution time in microseconds (accumulative) + void AddExecutionTime(const StateId& state_id, FunctionType type, int64_t duration_us) { + GetThreadMetrics(state_id).GetMetrics(type).execution_time_us += duration_us; + } + + // Get flattened metrics structure (merged across threads) + nlohmann::json GetMetrics() const { + nlohmann::json result = nlohmann::json::object(); + + struct Key { + FunctionType function_type; + size_t registration_order; + + bool operator==(const Key& other) const { + return function_type == other.function_type && registration_order == other.registration_order; + } + }; + + struct KeyHash { + size_t operator()(const Key& k) const { + return std::hash{}(static_cast(k.function_type)) ^ + (std::hash{}(k.registration_order) << 1); + } + }; + + std::unordered_map merged_metrics; + + // Collect and merge metrics by (function_type, registration_order) + for (const auto& [tid, state_map]: thread_metrics_) { + for (const auto& [state_id, thread_metrics]: state_map) { + if (thread_metrics.IsEmpty()) { + continue; + } + + for (size_t i = 0; i < ThreadMetrics::NUM_FUNCTION_TYPES - 1; ++i) { + const auto function_type = static_cast(i); + const auto& metrics = thread_metrics.GetMetrics(function_type); + + if (!metrics.IsEmpty()) { + StateFunctionKey state_function_key{state_id, function_type}; + auto order_it = state_function_registration_order_.find(state_function_key); + size_t registration_order = (order_it != state_function_registration_order_.end()) + ? order_it->second + : SIZE_MAX; + + Key key{function_type, registration_order}; + + auto& merged = merged_metrics[key]; + merged.input_tokens += metrics.input_tokens; + merged.output_tokens += metrics.output_tokens; + merged.api_calls += metrics.api_calls; + merged.api_duration_us += metrics.api_duration_us; + merged.execution_time_us += metrics.execution_time_us; + + if (merged.model_name.empty() && !metrics.model_name.empty()) { + merged.model_name = metrics.model_name; + } + if (merged.provider.empty() && !metrics.provider.empty()) { + merged.provider = metrics.provider; + } + } + } + } + } + + struct MetricEntry { + FunctionType function_type; + size_t registration_order; + FunctionMetricsData metrics; + }; + + std::vector entries; + entries.reserve(merged_metrics.size()); + + for (const auto& [key, metrics]: merged_metrics) { + entries.push_back({key.function_type, key.registration_order, metrics}); + } + + std::sort(entries.begin(), entries.end(), [](const MetricEntry& a, const MetricEntry& b) { + if (a.function_type != b.function_type) { + return a.function_type < b.function_type; + } + return a.registration_order < b.registration_order; + }); + + std::unordered_map function_counters; + + for (const auto& entry: entries) { + if (function_counters.find(entry.function_type) == function_counters.end()) { + function_counters[entry.function_type] = 0; + } + + function_counters[entry.function_type]++; + const std::string key = std::string(FunctionTypeToString(entry.function_type)) + "_" + std::to_string(function_counters[entry.function_type]); + + result[key] = entry.metrics.ToJson(); + } + + return result; + } + + // Get nested metrics structure preserving thread/state info (for debugging) + nlohmann::json GetDebugMetrics() const { + nlohmann::json result; + nlohmann::json threads_json = nlohmann::json::object(); + + size_t threads_with_output = 0; + + for (const auto& [tid, state_map]: thread_metrics_) { + std::ostringstream oss; + oss << tid; + const std::string thread_id_str = oss.str(); + + nlohmann::json thread_data; + bool thread_has_output = false; + + for (const auto& [state_id, thread_metrics]: state_map) { + if (thread_metrics.IsEmpty()) { + continue; + } + + std::ostringstream state_oss; + state_oss << state_id; + const std::string state_id_str = state_oss.str(); + + nlohmann::json state_data; + + for (size_t i = 0; i < ThreadMetrics::NUM_FUNCTION_TYPES - 1; ++i) { + const auto function_type = static_cast(i); + const auto& metrics = thread_metrics.GetMetrics(function_type); + + if (!metrics.IsEmpty()) { + StateFunctionKey state_function_key{state_id, function_type}; + auto order_it = state_function_registration_order_.find(state_function_key); + size_t registration_order = (order_it != state_function_registration_order_.end()) + ? order_it->second + : 0; + + nlohmann::json function_data = metrics.ToJson(); + function_data["registration_order"] = registration_order; + state_data[FunctionTypeToString(function_type)] = std::move(function_data); + } + } + + if (!state_data.empty()) { + thread_has_output = true; + thread_data[state_id_str] = std::move(state_data); + } + } + + if (thread_has_output) { + threads_with_output++; + threads_json[thread_id_str] = std::move(thread_data); + } + } + + result["threads"] = threads_json.empty() ? nlohmann::json::object() : std::move(threads_json); + result["thread_count"] = threads_with_output; + return result; + } + + // Clear all metrics and registration tracking + void Reset() { + thread_metrics_.clear(); + state_function_registration_order_.clear(); + thread_function_counters_.clear(); + } + +protected: + // Main storage: thread_id -> state_id -> ThreadMetrics + std::unordered_map, ThreadIdHash> thread_metrics_; + + // Registration order tracking structures + struct ThreadFunctionKey { + std::thread::id thread_id; + FunctionType function_type; + + bool operator==(const ThreadFunctionKey& other) const { + return thread_id == other.thread_id && function_type == other.function_type; + } + }; + + struct ThreadFunctionKeyHash { + size_t operator()(const ThreadFunctionKey& k) const { + return ThreadIdHash{}(k.thread_id) ^ + (std::hash{}(static_cast(k.function_type)) << 1); + } + }; + + struct StateFunctionKey { + StateId state_id; + FunctionType function_type; + + bool operator==(const StateFunctionKey& other) const { + return state_id == other.state_id && function_type == other.function_type; + } + }; + + struct StateFunctionKeyHash { + size_t operator()(const StateFunctionKey& k) const { + size_t state_hash = 0; + if constexpr (std::is_pointer_v) { + state_hash = std::hash{}(reinterpret_cast(k.state_id)); + } else { + state_hash = std::hash{}(k.state_id); + } + return state_hash ^ (std::hash{}(static_cast(k.function_type)) << 1); + } + }; + + std::unordered_map state_function_registration_order_; + std::unordered_map thread_function_counters_; +}; + +}// namespace flock diff --git a/src/include/flock/metrics/context.hpp b/src/include/flock/metrics/context.hpp new file mode 100644 index 00000000..3e8487da --- /dev/null +++ b/src/include/flock/metrics/context.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include "duckdb/main/database.hpp" +#include "flock/metrics/types.hpp" + +namespace flock { + +// Thread-local storage for metrics context (legacy, not used in function code) +class MetricsContext { +public: + static void SetWithDatabase(duckdb::DatabaseInstance* db, const void* state_id, FunctionType type) noexcept { + current_database_ = db; + current_state_id_ = state_id; + current_function_ = type; + } + + static void Clear() noexcept { + current_database_ = nullptr; + current_state_id_ = nullptr; + current_function_ = FunctionType::UNKNOWN; + } + + static duckdb::DatabaseInstance* GetDatabase() noexcept { + return current_database_; + } + + static const void* GetStateId() noexcept { + return current_state_id_; + } + + static FunctionType GetFunctionType() noexcept { + return current_function_; + } + + static bool IsActive() noexcept { + return current_database_ != nullptr && current_state_id_ != nullptr && current_function_ != FunctionType::UNKNOWN; + } + +private: + static thread_local duckdb::DatabaseInstance* current_database_; + static thread_local const void* current_state_id_; + static thread_local FunctionType current_function_; +}; + +}// namespace flock diff --git a/src/include/flock/metrics/data_structures.hpp b/src/include/flock/metrics/data_structures.hpp new file mode 100644 index 00000000..4ae30316 --- /dev/null +++ b/src/include/flock/metrics/data_structures.hpp @@ -0,0 +1,97 @@ +#pragma once + +#include "flock/metrics/types.hpp" +#include +#include +#include +#include +#include + +namespace flock { + +// Stores aggregated metrics for a single function call +struct FunctionMetricsData { + std::string model_name; + std::string provider; + int64_t input_tokens = 0; + int64_t output_tokens = 0; + int64_t api_calls = 0; + int64_t api_duration_us = 0; + int64_t execution_time_us = 0; + + int64_t total_tokens() const noexcept { + return input_tokens + output_tokens; + } + + double api_duration_ms() const noexcept { + return api_duration_us / 1000.0; + } + + double execution_time_ms() const noexcept { + return execution_time_us / 1000.0; + } + + bool IsEmpty() const noexcept { + return input_tokens == 0 && output_tokens == 0 && api_calls == 0 && + api_duration_us == 0 && execution_time_us == 0; + } + + nlohmann::json ToJson() const { + nlohmann::json result = { + {"input_tokens", input_tokens}, + {"output_tokens", output_tokens}, + {"total_tokens", total_tokens()}, + {"api_calls", api_calls}, + {"api_duration_ms", api_duration_ms()}, + {"execution_time_ms", execution_time_ms()}}; + + if (!model_name.empty()) { + result["model_name"] = model_name; + } + if (!provider.empty()) { + result["provider"] = provider; + } + + return result; + } +}; + +// Stores metrics for all function types in a single state +class ThreadMetrics { +public: + static constexpr size_t NUM_FUNCTION_TYPES = 8; + + void Reset() noexcept { + for (auto& func_metrics: by_function_) { + func_metrics = FunctionMetricsData{}; + } + } + + FunctionMetricsData& GetMetrics(FunctionType type) { + return by_function_[FunctionTypeToIndex(type)]; + } + + const FunctionMetricsData& GetMetrics(FunctionType type) const noexcept { + return by_function_[FunctionTypeToIndex(type)]; + } + + bool IsEmpty() const noexcept { + for (const auto& func_metrics: by_function_) { + if (!func_metrics.IsEmpty()) { + return false; + } + } + return true; + } + +private: + FunctionMetricsData by_function_[NUM_FUNCTION_TYPES]; +}; + +struct ThreadIdHash { + size_t operator()(const std::thread::id& id) const noexcept { + return std::hash{}(id); + } +}; + +}// namespace flock diff --git a/src/include/flock/metrics/manager.hpp b/src/include/flock/metrics/manager.hpp new file mode 100644 index 00000000..6c716f2a --- /dev/null +++ b/src/include/flock/metrics/manager.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include "duckdb/function/scalar_function.hpp" +#include "duckdb/main/database.hpp" +#include "flock/metrics/base_manager.hpp" +#include "flock/metrics/types.hpp" +#include +#include + +namespace flock { + +// Database-level metrics storage and unified API for scalar and aggregate functions +class MetricsManager : public BaseMetricsManager { +public: + // Get metrics manager for a database instance (creates if needed) + static MetricsManager& GetForDatabase(duckdb::DatabaseInstance* db) { + if (db == nullptr) { + throw std::runtime_error("Database instance is null"); + } + + static std::unordered_map> db_managers; + + auto it = db_managers.find(db); + if (it == db_managers.end()) { + auto manager = std::make_unique(); + auto* manager_ptr = manager.get(); + db_managers[db] = std::move(manager); + return *manager_ptr; + } + return *it->second; + } + + // Initialize metrics tracking (stores context for subsequent calls) + static void StartInvocation(duckdb::DatabaseInstance* db, const void* state_id, FunctionType type) { + if (db != nullptr && state_id != nullptr) { + current_db_ = db; + current_state_id_ = state_id; + current_function_type_ = type; + + auto& manager = GetForDatabase(db); + manager.RegisterThread(state_id); + manager.BaseMetricsManager::StartInvocation(state_id, type); + } + } + + // Record model name and provider + static void SetModelInfo(const std::string& model_name, const std::string& provider) { + if (current_db_ != nullptr && current_state_id_ != nullptr) { + auto& manager = GetForDatabase(current_db_); + manager.BaseMetricsManager::SetModelInfo(current_state_id_, current_function_type_, model_name, provider); + } + } + + // Record token usage (accumulative) + static void UpdateTokens(int64_t input, int64_t output) { + if (current_db_ != nullptr && current_state_id_ != nullptr) { + auto& manager = GetForDatabase(current_db_); + manager.BaseMetricsManager::UpdateTokens(current_state_id_, current_function_type_, input, output); + } + } + + // Increment API call counter + static void IncrementApiCalls() { + if (current_db_ != nullptr && current_state_id_ != nullptr) { + auto& manager = GetForDatabase(current_db_); + manager.BaseMetricsManager::IncrementApiCalls(current_state_id_, current_function_type_); + } + } + + // Record API call duration in milliseconds (accumulative) + static void AddApiDuration(double duration_ms) { + if (current_db_ != nullptr && current_state_id_ != nullptr) { + const int64_t duration_us = static_cast(duration_ms * 1000.0); + auto& manager = GetForDatabase(current_db_); + manager.BaseMetricsManager::AddApiDuration(current_state_id_, current_function_type_, duration_us); + } + } + + // Record execution time in milliseconds (accumulative) + static void AddExecutionTime(double duration_ms) { + if (current_db_ != nullptr && current_state_id_ != nullptr) { + const int64_t duration_us = static_cast(duration_ms * 1000.0); + auto& manager = GetForDatabase(current_db_); + manager.BaseMetricsManager::AddExecutionTime(current_state_id_, current_function_type_, duration_us); + } + } + + // Clear stored context (optional, auto-cleared on next StartInvocation) + static void ClearContext() { + current_db_ = nullptr; + current_state_id_ = nullptr; + current_function_type_ = FunctionType::UNKNOWN; + } + + // SQL function implementations + static void ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); + static void ExecuteGetDebugMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); + static void ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); + +private: + // Thread-local storage for current metrics context + static thread_local duckdb::DatabaseInstance* current_db_; + static thread_local const void* current_state_id_; + static thread_local FunctionType current_function_type_; +}; + +}// namespace flock diff --git a/src/include/flock/metrics/types.hpp b/src/include/flock/metrics/types.hpp new file mode 100644 index 00000000..e7c24c7a --- /dev/null +++ b/src/include/flock/metrics/types.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +namespace flock { + +enum class FunctionType : uint8_t { + LLM_COMPLETE = 0, + LLM_FILTER = 1, + LLM_EMBEDDING = 2, + LLM_REDUCE = 3, + LLM_RERANK = 4, + LLM_FIRST = 5, + LLM_LAST = 6, + UNKNOWN = 7 +}; + +inline constexpr const char* FunctionTypeToString(FunctionType type) noexcept { + switch (type) { + case FunctionType::LLM_COMPLETE: + return "llm_complete"; + case FunctionType::LLM_FILTER: + return "llm_filter"; + case FunctionType::LLM_EMBEDDING: + return "llm_embedding"; + case FunctionType::LLM_REDUCE: + return "llm_reduce"; + case FunctionType::LLM_RERANK: + return "llm_rerank"; + case FunctionType::LLM_FIRST: + return "llm_first"; + case FunctionType::LLM_LAST: + return "llm_last"; + default: + return "unknown"; + } +} + +inline constexpr size_t FunctionTypeToIndex(FunctionType type) noexcept { + return static_cast(type); +} + +}// namespace flock diff --git a/src/metrics/metrics.cpp b/src/metrics/metrics.cpp new file mode 100644 index 00000000..7ec2f174 --- /dev/null +++ b/src/metrics/metrics.cpp @@ -0,0 +1,50 @@ +#include "flock/metrics/manager.hpp" + +namespace flock { + +// Thread-local storage definitions (must be in .cpp file) +thread_local duckdb::DatabaseInstance* MetricsManager::current_db_ = nullptr; +thread_local const void* MetricsManager::current_state_id_ = nullptr; +thread_local FunctionType MetricsManager::current_function_type_ = FunctionType::UNKNOWN; + +void MetricsManager::ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + auto& context = state.GetContext(); + auto* db = context.db.get(); + + auto& metrics_manager = GetForDatabase(db); + auto metrics = metrics_manager.GetMetrics(); + + auto json_str = metrics.dump(); + + result.SetVectorType(duckdb::VectorType::CONSTANT_VECTOR); + auto result_data = duckdb::ConstantVector::GetData(result); + result_data[0] = duckdb::StringVector::AddString(result, json_str); +} + +void MetricsManager::ExecuteGetDebugMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + auto& context = state.GetContext(); + auto* db = context.db.get(); + + auto& metrics_manager = GetForDatabase(db); + auto metrics = metrics_manager.GetDebugMetrics(); + + auto json_str = metrics.dump(); + + result.SetVectorType(duckdb::VectorType::CONSTANT_VECTOR); + auto result_data = duckdb::ConstantVector::GetData(result); + result_data[0] = duckdb::StringVector::AddString(result, json_str); +} + +void MetricsManager::ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + auto& context = state.GetContext(); + auto* db = context.db.get(); + + auto& metrics_manager = GetForDatabase(db); + metrics_manager.Reset(); + + result.SetVectorType(duckdb::VectorType::CONSTANT_VECTOR); + auto result_data = duckdb::ConstantVector::GetData(result); + result_data[0] = duckdb::StringVector::AddString(result, "Metrics reset successfully"); +} + +}// namespace flock diff --git a/test/integration/src/integration/tests/functions/scalar/test_metrics.py b/test/integration/src/integration/tests/functions/scalar/test_metrics.py new file mode 100644 index 00000000..c42101b0 --- /dev/null +++ b/test/integration/src/integration/tests/functions/scalar/test_metrics.py @@ -0,0 +1,91 @@ +import pytest +from integration.conftest import run_cli + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) +def model_config(request): + return request.param + + +def test_flock_get_metrics_returns_json(integration_setup): + duckdb_cli_path, db_path = integration_setup + query = "SELECT flock_get_metrics() AS metrics;" + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "metrics" in result.stdout.lower() + assert "total_input_tokens" in result.stdout.lower() + assert "total_output_tokens" in result.stdout.lower() + assert "total_api_calls" in result.stdout.lower() + + +def test_flock_reset_metrics(integration_setup): + duckdb_cli_path, db_path = integration_setup + query = "SELECT flock_reset_metrics() AS result;" + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "reset" in result.stdout.lower() + + +def test_metrics_after_llm_complete(integration_setup, model_config): + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'What is 2+2?'} + ) AS result; + """ + ) + run_cli(duckdb_cli_path, db_path, query) + + metrics_query = "SELECT flock_get_metrics() AS metrics;" + metrics_result = run_cli(duckdb_cli_path, db_path, metrics_query) + + assert metrics_result.returncode == 0 + assert "total_api_calls" in metrics_result.stdout.lower() + + +def test_metrics_reset_clears_counters(integration_setup, model_config): + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + test_model_name = f"test-reset-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Say hello'} + ) AS result; + """ + ) + run_cli(duckdb_cli_path, db_path, query) + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + metrics_query = "SELECT flock_get_metrics() AS metrics;" + metrics_result = run_cli(duckdb_cli_path, db_path, metrics_query) + + assert metrics_result.returncode == 0 + output = metrics_result.stdout.lower() + assert "total_api_calls" in output and ":0" in output.replace(" ", "") From b90eac70deae46259730567e6474b968ebff51b4 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:50:56 -0500 Subject: [PATCH 06/59] Removed old metrics wrapper --- src/include/flock/metrics/metrics.hpp | 77 --------------------------- src/metrics/implementation.cpp | 15 ------ 2 files changed, 92 deletions(-) delete mode 100644 src/include/flock/metrics/metrics.hpp delete mode 100644 src/metrics/implementation.cpp diff --git a/src/include/flock/metrics/metrics.hpp b/src/include/flock/metrics/metrics.hpp deleted file mode 100644 index 90896a3a..00000000 --- a/src/include/flock/metrics/metrics.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include "duckdb/function/scalar_function.hpp" -#include -#include -#include - -namespace flock { - -class FlockMetrics { -public: - static FlockMetrics& GetInstance() { - static FlockMetrics instance; - return instance; - } - - FlockMetrics(const FlockMetrics&) = delete; - FlockMetrics& operator=(const FlockMetrics&) = delete; - - void UpdateTokenUsage(int64_t input_tokens, int64_t output_tokens) { - std::lock_guard lock(mutex_); - total_input_tokens_ += input_tokens; - total_output_tokens_ += output_tokens; - } - - void IncrementApiCalls() { - std::lock_guard lock(mutex_); - total_api_calls_++; - } - - void AddApiDuration(double duration_ms) { - std::lock_guard lock(mutex_); - total_api_duration_ms_ += duration_ms; - } - - void AddExecutionTime(double execution_time_ms) { - std::lock_guard lock(mutex_); - total_execution_time_ms_ += execution_time_ms; - } - - nlohmann::json GetMetrics() const { - std::lock_guard lock(mutex_); - return { - {"total_input_tokens", total_input_tokens_}, - {"total_output_tokens", total_output_tokens_}, - {"total_tokens", total_input_tokens_ + total_output_tokens_}, - {"total_api_calls", total_api_calls_}, - {"total_api_duration_ms", total_api_duration_ms_}, - {"total_execution_time_ms", total_execution_time_ms_}, - {"avg_api_duration_ms", total_api_calls_ > 0 ? total_api_duration_ms_ / total_api_calls_ : 0.0}, - {"avg_execution_time_ms", total_api_calls_ > 0 ? total_execution_time_ms_ / total_api_calls_ : 0.0}}; - } - - void Reset() { - std::lock_guard lock(mutex_); - total_input_tokens_ = 0; - total_output_tokens_ = 0; - total_api_calls_ = 0; - total_api_duration_ms_ = 0.0; - total_execution_time_ms_ = 0.0; - } - - static void ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); - static void ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); - -private: - FlockMetrics() = default; - - mutable std::mutex mutex_; - int64_t total_input_tokens_ = 0; - int64_t total_output_tokens_ = 0; - int64_t total_api_calls_ = 0; - double total_api_duration_ms_ = 0.0; - double total_execution_time_ms_ = 0.0; -}; - -}// namespace flock diff --git a/src/metrics/implementation.cpp b/src/metrics/implementation.cpp deleted file mode 100644 index 3642dc57..00000000 --- a/src/metrics/implementation.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "flock/metrics/metrics.hpp" - -namespace flock { - -void FlockMetrics::ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - auto json_str = GetInstance().GetMetrics().dump(); - result.SetValue(0, duckdb::Value(json_str)); -} - -void FlockMetrics::ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - GetInstance().Reset(); - result.SetValue(0, duckdb::Value("Metrics reset successfully")); -} - -}// namespace flock From 2b80454960479835e007daa8d0b4f5b171778bde Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:51:08 -0500 Subject: [PATCH 07/59] Updated metrics registry --- src/functions/aggregate/aggregate.cpp | 3 -- src/functions/aggregate/aggregate_state.cpp | 6 +++ .../llm_first_or_last/implementation.cpp | 37 ++++++++++++++----- .../aggregate/llm_reduce/implementation.cpp | 33 ++++++++++++----- .../aggregate/llm_rerank/implementation.cpp | 34 ++++++++++++----- .../scalar/llm_embedding/implementation.cpp | 19 ++++++++-- .../scalar/llm_filter/implementation.cpp | 20 ++++++++-- .../flock/functions/aggregate/aggregate.hpp | 25 +++++++++---- src/include/flock/registry/scalar.hpp | 1 + src/metrics/registry.cpp | 24 +++++++++--- src/registry/scalar.cpp | 1 + 11 files changed, 150 insertions(+), 53 deletions(-) diff --git a/src/functions/aggregate/aggregate.cpp b/src/functions/aggregate/aggregate.cpp index f17cfc62..5f7c3fb0 100644 --- a/src/functions/aggregate/aggregate.cpp +++ b/src/functions/aggregate/aggregate.cpp @@ -2,9 +2,6 @@ namespace flock { -nlohmann::json AggregateFunctionBase::model_details; -std::string AggregateFunctionBase::user_query; - void AggregateFunctionBase::ValidateArguments(duckdb::Vector inputs[], idx_t input_count) { if (input_count != 3) { throw std::runtime_error("Expected exactly 3 arguments for aggregate function, got " + std::to_string(input_count)); diff --git a/src/functions/aggregate/aggregate_state.cpp b/src/functions/aggregate/aggregate_state.cpp index 9959af35..3f50d922 100644 --- a/src/functions/aggregate/aggregate_state.cpp +++ b/src/functions/aggregate/aggregate_state.cpp @@ -37,6 +37,12 @@ void AggregateFunctionState::Combine(const AggregateFunctionState& source) { Initialize(); } + // Copy model_details and user_query from source if not already set + if (model_details.empty() && !source.model_details.empty()) { + model_details = source.model_details; + user_query = source.user_query; + } + if (source.value) { auto idx = 0u; for (auto& column: *source.value) { diff --git a/src/functions/aggregate/llm_first_or_last/implementation.cpp b/src/functions/aggregate/llm_first_or_last/implementation.cpp index 485c0069..98d052be 100644 --- a/src/functions/aggregate/llm_first_or_last/implementation.cpp +++ b/src/functions/aggregate/llm_first_or_last/implementation.cpp @@ -1,5 +1,6 @@ +#include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_first_or_last.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include @@ -78,16 +79,30 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, AggregateFunctionType function_type) { - // Start execution timing - auto exec_start = std::chrono::high_resolution_clock::now(); - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); + // Map AggregateFunctionType to FunctionType + FunctionType metrics_function_type = (function_type == AggregateFunctionType::FIRST) ? FunctionType::LLM_FIRST : FunctionType::LLM_LAST; + for (idx_t i = 0; i < count; i++) { auto idx = i + offset; auto* state = states_vector[idx]; if (state && !state->value->empty()) { + // Use model_details and user_query from the state (not static variables) + Model model(state->model_details); + auto model_details_obj = model.GetModelDetails(); + + // Get database instance and state ID for metrics + auto db = Config::db; + const void* state_id = static_cast(state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, metrics_function_type); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + + auto exec_start = std::chrono::high_resolution_clock::now(); + auto tuples_with_ids = *state->value; tuples_with_ids.push_back(nlohmann::json::object()); for (auto j = 0; j < static_cast((*state->value)[0]["data"].size()); j++) { @@ -99,17 +114,19 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn } LlmFirstOrLast function_instance; function_instance.function_type = function_type; + function_instance.user_query = state->user_query; + function_instance.model_details = state->model_details; auto response = function_instance.Evaluate(tuples_with_ids); + + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); + result.SetValue(idx, response.dump()); } else { - result.SetValue(idx, nullptr);// Empty JSON object for null/empty states + result.SetValue(idx, nullptr); } } - - // End execution timing and update metrics - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/aggregate/llm_reduce/implementation.cpp b/src/functions/aggregate/llm_reduce/implementation.cpp index 44ba02ab..2d050de8 100644 --- a/src/functions/aggregate/llm_reduce/implementation.cpp +++ b/src/functions/aggregate/llm_reduce/implementation.cpp @@ -1,5 +1,6 @@ +#include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_reduce.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include @@ -67,9 +68,6 @@ nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, const AggregateFunctionType function_type) { - // Start execution timing - auto exec_start = std::chrono::high_resolution_clock::now(); - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); for (idx_t i = 0; i < count; i++) { @@ -77,9 +75,29 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa auto* state = states_vector[idx]; if (state && !state->value->empty()) { + // Use model_details and user_query from the state (not static variables) + Model model(state->model_details); + auto model_details_obj = model.GetModelDetails(); + + // Get database instance and state ID for metrics + auto db = Config::db; + const void* state_id = static_cast(state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_REDUCE); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + + auto exec_start = std::chrono::high_resolution_clock::now(); + LlmReduce reduce_instance; - reduce_instance.model = Model(model_details); + reduce_instance.model = Model(state->model_details); + reduce_instance.user_query = state->user_query; auto response = reduce_instance.ReduceLoop(*state->value, function_type); + + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); + if (response.is_string()) { result.SetValue(idx, response.get()); } else { @@ -89,11 +107,6 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa result.SetValue(idx, nullptr);// Empty result for null/empty states } } - - // End execution timing and update metrics - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/aggregate/llm_rerank/implementation.cpp b/src/functions/aggregate/llm_rerank/implementation.cpp index c43b17f1..fc261eed 100644 --- a/src/functions/aggregate/llm_rerank/implementation.cpp +++ b/src/functions/aggregate/llm_rerank/implementation.cpp @@ -1,5 +1,6 @@ +#include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_rerank.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include @@ -117,9 +118,6 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset) { - // Start execution timing - auto exec_start = std::chrono::high_resolution_clock::now(); - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); for (idx_t i = 0; i < count; i++) { @@ -127,22 +125,38 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg auto* state = states_vector[idx]; if (state && !state->value->empty()) { + // Use model_details and user_query from the state (not static variables) + Model model(state->model_details); + auto model_details_obj = model.GetModelDetails(); + + // Get database instance and state ID for metrics + auto db = Config::db; + const void* state_id = static_cast(state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_RERANK); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + + auto exec_start = std::chrono::high_resolution_clock::now(); + auto tuples_with_ids = nlohmann::json::array(); for (auto j = 0; j < static_cast(state->value->size()); j++) { tuples_with_ids.push_back((*state->value)[j]); } LlmRerank function_instance; + function_instance.user_query = state->user_query; + function_instance.model_details = state->model_details; auto reranked_tuples = function_instance.SlidingWindow(tuples_with_ids); + + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); + result.SetValue(idx, reranked_tuples.dump()); } else { - result.SetValue(idx, nullptr);// Empty result for null/empty states + result.SetValue(idx, nullptr); } } - - // End execution timing and update metrics - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/scalar/llm_embedding/implementation.cpp b/src/functions/scalar/llm_embedding/implementation.cpp index bd4eebdd..38dde7e1 100644 --- a/src/functions/scalar/llm_embedding/implementation.cpp +++ b/src/functions/scalar/llm_embedding/implementation.cpp @@ -1,5 +1,6 @@ +#include "flock/core/config.hpp" #include "flock/functions/scalar/llm_embedding.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include @@ -35,6 +36,10 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); Model model(model_details_json); + // Set model name and provider in metrics (context is already set in Execute) + auto model_details = model.GetModelDetails(); + MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); + std::vector prepared_inputs; auto num_rows = inputs["context_columns"][0]["data"].size(); for (size_t row_idx = 0; row_idx < num_rows; row_idx++) { @@ -74,7 +79,14 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC } void LlmEmbedding::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Start execution timing + // Get database instance and state ID for metrics + auto& context = state.GetContext(); + auto* db = context.db.get(); + const void* state_id = static_cast(&state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_EMBEDDING); + auto exec_start = std::chrono::high_resolution_clock::now(); auto results = LlmEmbedding::Operation(args); @@ -84,10 +96,9 @@ void LlmEmbedding::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& sta result.SetValue(index++, duckdb::Value::LIST(res)); } - // End execution timing and update metrics auto exec_end = std::chrono::high_resolution_clock::now(); double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); + MetricsManager::AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index fd61b1d2..fbd8640d 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -1,5 +1,6 @@ +#include "flock/core/config.hpp" #include "flock/functions/scalar/llm_filter.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include @@ -27,6 +28,11 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); Model model(model_details_json); + + // Set model name and provider in metrics (context is already set in Execute) + auto model_details = model.GetModelDetails(); + MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); + auto prompt_context_json = CastVectorOfStructsToJson(args.data[1], args.size()); auto context_columns = nlohmann::json::array(); if (prompt_context_json.contains("context_columns")) { @@ -51,7 +57,14 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { } void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Start execution timing + // Get database instance and state ID for metrics + auto& context = state.GetContext(); + auto* db = context.db.get(); + const void* state_id = static_cast(&state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_FILTER); + auto exec_start = std::chrono::high_resolution_clock::now(); const auto results = LlmFilter::Operation(args); @@ -61,10 +74,9 @@ void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, result.SetValue(index++, duckdb::Value(res)); } - // End execution timing and update metrics auto exec_end = std::chrono::high_resolution_clock::now(); double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); + MetricsManager::AddExecutionTime(exec_duration_ms); } }// namespace flock diff --git a/src/include/flock/functions/aggregate/aggregate.hpp b/src/include/flock/functions/aggregate/aggregate.hpp index 180a3b2c..efa93436 100644 --- a/src/include/flock/functions/aggregate/aggregate.hpp +++ b/src/include/flock/functions/aggregate/aggregate.hpp @@ -5,6 +5,7 @@ #include "flock/core/common.hpp" #include "flock/functions/input_parser.hpp" +#include "flock/metrics/manager.hpp" #include "flock/model_manager/model.hpp" namespace flock { @@ -13,8 +14,10 @@ class AggregateFunctionState { public: nlohmann::basic_json<>* value; bool initialized; + nlohmann::json model_details; + std::string user_query; - AggregateFunctionState() : value(nullptr), initialized(false) {} + AggregateFunctionState() : value(nullptr), initialized(false), model_details(nlohmann::json::object()), user_query("") {} ~AggregateFunctionState() { if (value) { @@ -32,8 +35,8 @@ class AggregateFunctionState { class AggregateFunctionBase { public: Model model; - static nlohmann::json model_details; - static std::string user_query; + std::string user_query; + nlohmann::json model_details; public: explicit AggregateFunctionBase() = default; @@ -64,8 +67,7 @@ class AggregateFunctionBase { // ValidateArguments(inputs, input_count); auto [model_details_json, prompt_details, columns] = CastInputsToJson(inputs, count); - model_details = model_details_json; - user_query = PromptManager::CreatePromptDetails(prompt_details).prompt; + auto prompt_str = PromptManager::CreatePromptDetails(prompt_details).prompt; auto state_map_p = reinterpret_cast(duckdb::FlatVector::GetData(states)); @@ -86,6 +88,11 @@ class AggregateFunctionBase { } if (state) { + // Store model_details and user_query in the state (only set once, on first update) + if (state->model_details.empty()) { + state->model_details = model_details_json; + state->user_query = prompt_str; + } state->Update(tuple); } } @@ -97,10 +104,14 @@ class AggregateFunctionBase { // ValidateArguments(inputs, input_count); auto [model_details_json, prompt_details, tuples] = CastInputsToJson(inputs, count); - model_details = model_details_json; - user_query = PromptManager::CreatePromptDetails(prompt_details).prompt; + auto prompt_str = PromptManager::CreatePromptDetails(prompt_details).prompt; if (const auto state = reinterpret_cast(state_p)) { + // Store model_details and user_query in the state (only set once, on first update) + if (state->model_details.empty()) { + state->model_details = model_details_json; + state->user_query = prompt_str; + } state->Update(tuples); } } diff --git a/src/include/flock/registry/scalar.hpp b/src/include/flock/registry/scalar.hpp index 2518b7d1..084690c1 100644 --- a/src/include/flock/registry/scalar.hpp +++ b/src/include/flock/registry/scalar.hpp @@ -18,6 +18,7 @@ class ScalarRegistry { static void RegisterFusionCombMNZ(duckdb::ExtensionLoader& loader); static void RegisterFusionCombSUM(duckdb::ExtensionLoader& loader); static void RegisterFlockGetMetrics(duckdb::ExtensionLoader& loader); + static void RegisterFlockGetDebugMetrics(duckdb::ExtensionLoader& loader); static void RegisterFlockResetMetrics(duckdb::ExtensionLoader& loader); }; diff --git a/src/metrics/registry.cpp b/src/metrics/registry.cpp index 42d89908..bccaec0d 100644 --- a/src/metrics/registry.cpp +++ b/src/metrics/registry.cpp @@ -1,22 +1,36 @@ #include "flock/registry/registry.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" namespace flock { void ScalarRegistry::RegisterFlockGetMetrics(duckdb::ExtensionLoader& loader) { - loader.RegisterFunction(duckdb::ScalarFunction( + auto function = duckdb::ScalarFunction( "flock_get_metrics", {}, duckdb::LogicalType::JSON(), - FlockMetrics::ExecuteGetMetrics)); + MetricsManager::ExecuteGetMetrics); + function.stability = duckdb::FunctionStability::VOLATILE; + loader.RegisterFunction(function); +} + +void ScalarRegistry::RegisterFlockGetDebugMetrics(duckdb::ExtensionLoader& loader) { + auto function = duckdb::ScalarFunction( + "flock_get_debug_metrics", + {}, + duckdb::LogicalType::JSON(), + MetricsManager::ExecuteGetDebugMetrics); + function.stability = duckdb::FunctionStability::VOLATILE; + loader.RegisterFunction(function); } void ScalarRegistry::RegisterFlockResetMetrics(duckdb::ExtensionLoader& loader) { - loader.RegisterFunction(duckdb::ScalarFunction( + auto function = duckdb::ScalarFunction( "flock_reset_metrics", {}, duckdb::LogicalType::VARCHAR, - FlockMetrics::ExecuteResetMetrics)); + MetricsManager::ExecuteResetMetrics); + function.stability = duckdb::FunctionStability::VOLATILE; + loader.RegisterFunction(function); } }// namespace flock diff --git a/src/registry/scalar.cpp b/src/registry/scalar.cpp index 0af1a009..1d7e181e 100644 --- a/src/registry/scalar.cpp +++ b/src/registry/scalar.cpp @@ -12,6 +12,7 @@ void ScalarRegistry::Register(duckdb::ExtensionLoader& loader) { RegisterFusionCombMNZ(loader); RegisterFusionCombSUM(loader); RegisterFlockGetMetrics(loader); + RegisterFlockGetDebugMetrics(loader); RegisterFlockResetMetrics(loader); } From 2965d81be700e2d0cefa2c55fd9892a86207cd3b Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:52:34 -0500 Subject: [PATCH 08/59] Updated handlers to use MetricsManager --- .../providers/handlers/base_handler.hpp | 9 +- .../providers/adapters/openai.cpp | 1 - test/unit/functions/scalar/metrics_test.cpp | 507 +++++++++++++++--- 3 files changed, 424 insertions(+), 93 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 9722fed2..27b89dc3 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -1,6 +1,6 @@ #pragma once -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include "flock/model_manager/providers/handlers/handler.hpp" #include "session.hpp" #include @@ -108,11 +108,10 @@ class BaseModelProviderHandler : public IModelProviderHandler { curl_easy_cleanup(requests[i].easy); } - auto& metrics = FlockMetrics::GetInstance(); - metrics.UpdateTokenUsage(batch_input_tokens, batch_output_tokens); - metrics.AddApiDuration(api_duration_ms); + MetricsManager::UpdateTokens(batch_input_tokens, batch_output_tokens); + MetricsManager::AddApiDuration(api_duration_ms); for (size_t i = 0; i < jsons.size(); ++i) { - metrics.IncrementApiCalls(); + MetricsManager::IncrementApiCalls(); } curl_slist_free_all(headers); diff --git a/src/model_manager/providers/adapters/openai.cpp b/src/model_manager/providers/adapters/openai.cpp index 2b1c97d2..de1a2c0c 100644 --- a/src/model_manager/providers/adapters/openai.cpp +++ b/src/model_manager/providers/adapters/openai.cpp @@ -4,7 +4,6 @@ namespace flock { void OpenAIProvider::AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) { - auto message_content = nlohmann::json::array(); message_content.push_back({{"type", "text"}, {"text", prompt}}); diff --git a/test/unit/functions/scalar/metrics_test.cpp b/test/unit/functions/scalar/metrics_test.cpp index 4134bf03..a66e5637 100644 --- a/test/unit/functions/scalar/metrics_test.cpp +++ b/test/unit/functions/scalar/metrics_test.cpp @@ -1,5 +1,5 @@ #include "flock/core/config.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include namespace flock { @@ -7,140 +7,473 @@ namespace flock { class MetricsTest : public ::testing::Test { protected: void SetUp() override { - FlockMetrics::GetInstance().Reset(); + auto con = Config::GetConnection(); + // Reset metrics before each test to ensure clean state + auto& manager = MetricsManager::GetForDatabase(GetDatabase()); + manager.Reset(); } - void TearDown() override { - FlockMetrics::GetInstance().Reset(); + duckdb::DatabaseInstance* GetDatabase() { + return Config::db; + } + + MetricsManager& GetMetricsManager() { + return MetricsManager::GetForDatabase(GetDatabase()); } }; TEST_F(MetricsTest, InitialMetricsAreZero) { - auto metrics = FlockMetrics::GetInstance().GetMetrics(); + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + EXPECT_TRUE(metrics.is_object()); + EXPECT_TRUE(metrics.empty()); +} - EXPECT_EQ(metrics["total_input_tokens"].get(), 0); - EXPECT_EQ(metrics["total_output_tokens"].get(), 0); - EXPECT_EQ(metrics["total_tokens"].get(), 0); - EXPECT_EQ(metrics["total_api_calls"].get(), 0); - EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 0.0); - EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 0.0); +TEST_F(MetricsTest, UpdateTokensForLlmComplete) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 100); + EXPECT_EQ(value["output_tokens"].get(), 50); + EXPECT_EQ(value["total_tokens"].get(), 150); + found = true; + break; + } + } + EXPECT_TRUE(found); } -TEST_F(MetricsTest, UpdateTokenUsage) { - FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); +TEST_F(MetricsTest, TracksDifferentFunctionsSeparately) { + auto* db = GetDatabase(); + const void* state_id1 = reinterpret_cast(0x1234); + const void* state_id2 = reinterpret_cast(0x5678); + + MetricsManager::StartInvocation(db, state_id1, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + MetricsManager::AddExecutionTime(1000.0); + + MetricsManager::StartInvocation(db, state_id2, FunctionType::LLM_FILTER); + MetricsManager::UpdateTokens(200, 100); + MetricsManager::AddExecutionTime(2000.0); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found_complete = false; + bool found_filter = false; + int64_t total_input = 0; + int64_t total_output = 0; + + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 100); + total_input += value["input_tokens"].get(); + total_output += value["output_tokens"].get(); + found_complete = true; + } else if (key.find("llm_filter_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 200); + total_input += value["input_tokens"].get(); + total_output += value["output_tokens"].get(); + found_filter = true; + } + } - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_EQ(metrics["total_input_tokens"].get(), 100); - EXPECT_EQ(metrics["total_output_tokens"].get(), 50); - EXPECT_EQ(metrics["total_tokens"].get(), 150); + EXPECT_TRUE(found_complete); + EXPECT_TRUE(found_filter); + EXPECT_EQ(total_input, 300); + EXPECT_EQ(total_output, 150); } TEST_F(MetricsTest, IncrementApiCalls) { - FlockMetrics::GetInstance().IncrementApiCalls(); - FlockMetrics::GetInstance().IncrementApiCalls(); - FlockMetrics::GetInstance().IncrementApiCalls(); + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::IncrementApiCalls(); + MetricsManager::IncrementApiCalls(); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_FILTER); + MetricsManager::IncrementApiCalls(); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + int64_t total_api_calls = 0; + int64_t complete_calls = 0; + int64_t filter_calls = 0; + + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + complete_calls = value["api_calls"].get(); + total_api_calls += complete_calls; + } else if (key.find("llm_filter_") == 0) { + filter_calls = value["api_calls"].get(); + total_api_calls += filter_calls; + } + } - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_EQ(metrics["total_api_calls"].get(), 3); + EXPECT_EQ(total_api_calls, 3); + EXPECT_EQ(complete_calls, 2); + EXPECT_EQ(filter_calls, 1); } TEST_F(MetricsTest, AddApiDuration) { - FlockMetrics::GetInstance().AddApiDuration(100.5); - FlockMetrics::GetInstance().AddApiDuration(200.25); - - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 300.75); + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::AddApiDuration(100.5); + MetricsManager::AddApiDuration(200.25); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + EXPECT_NEAR(value["api_duration_ms"].get(), 300.75, 0.01); + found = true; + break; + } + } + EXPECT_TRUE(found); } TEST_F(MetricsTest, AddExecutionTime) { - FlockMetrics::GetInstance().AddExecutionTime(150.0); - FlockMetrics::GetInstance().AddExecutionTime(250.0); - - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 400.0); + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::AddExecutionTime(150.0); + MetricsManager::AddExecutionTime(250.0); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + EXPECT_NEAR(value["execution_time_ms"].get(), 400.0, 0.01); + found = true; + break; + } + } + EXPECT_TRUE(found); } -TEST_F(MetricsTest, AveragesCalculatedCorrectly) { - FlockMetrics::GetInstance().IncrementApiCalls(); - FlockMetrics::GetInstance().IncrementApiCalls(); - FlockMetrics::GetInstance().AddApiDuration(100.0); - FlockMetrics::GetInstance().AddApiDuration(200.0); - FlockMetrics::GetInstance().AddExecutionTime(150.0); - FlockMetrics::GetInstance().AddExecutionTime(250.0); +TEST_F(MetricsTest, ResetClearsAllMetrics) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(100.0); + MetricsManager::AddExecutionTime(150.0); + + auto& manager = GetMetricsManager(); + manager.Reset(); - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_DOUBLE_EQ(metrics["avg_api_duration_ms"].get(), 150.0); - EXPECT_DOUBLE_EQ(metrics["avg_execution_time_ms"].get(), 200.0); + auto metrics = manager.GetMetrics(); + EXPECT_TRUE(metrics.is_object()); + EXPECT_TRUE(metrics.empty()); } -TEST_F(MetricsTest, AveragesZeroWhenNoApiCalls) { - FlockMetrics::GetInstance().AddApiDuration(100.0); - FlockMetrics::GetInstance().AddExecutionTime(150.0); +TEST_F(MetricsTest, SqlFunctionFlockGetMetrics) { + auto con = Config::GetConnection(); + auto results = con.Query("SELECT flock_get_metrics() AS metrics;"); + + ASSERT_FALSE(results->HasError()) << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + + auto json_str = results->GetValue(0, 0).GetValue(); + auto metrics = nlohmann::json::parse(json_str); - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_DOUBLE_EQ(metrics["avg_api_duration_ms"].get(), 0.0); - EXPECT_DOUBLE_EQ(metrics["avg_execution_time_ms"].get(), 0.0); + EXPECT_TRUE(metrics.is_object()); } -TEST_F(MetricsTest, ResetClearsAllMetrics) { - FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); - FlockMetrics::GetInstance().IncrementApiCalls(); - FlockMetrics::GetInstance().AddApiDuration(100.0); - FlockMetrics::GetInstance().AddExecutionTime(150.0); +TEST_F(MetricsTest, SqlFunctionFlockResetMetrics) { + auto con = Config::GetConnection(); + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + MetricsManager::IncrementApiCalls(); + + auto results = con.Query("SELECT flock_reset_metrics() AS result;"); - FlockMetrics::GetInstance().Reset(); + ASSERT_FALSE(results->HasError()) << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_EQ(metrics["total_input_tokens"].get(), 0); - EXPECT_EQ(metrics["total_output_tokens"].get(), 0); - EXPECT_EQ(metrics["total_api_calls"].get(), 0); - EXPECT_DOUBLE_EQ(metrics["total_api_duration_ms"].get(), 0.0); - EXPECT_DOUBLE_EQ(metrics["total_execution_time_ms"].get(), 0.0); + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + EXPECT_TRUE(metrics.is_object()); + EXPECT_TRUE(metrics.empty()); } -TEST_F(MetricsTest, AccumulatesMultipleUpdates) { - FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); - FlockMetrics::GetInstance().UpdateTokenUsage(200, 100); - FlockMetrics::GetInstance().UpdateTokenUsage(50, 25); +TEST_F(MetricsTest, SequentialNumberingForMultipleCalls) { + auto* db = GetDatabase(); + const void* state_id1 = reinterpret_cast(0x1111); + const void* state_id2 = reinterpret_cast(0x2222); + const void* state_id3 = reinterpret_cast(0x3333); + + MetricsManager::StartInvocation(db, state_id1, FunctionType::LLM_FILTER); + MetricsManager::UpdateTokens(10, 5); + + MetricsManager::StartInvocation(db, state_id2, FunctionType::LLM_FILTER); + MetricsManager::UpdateTokens(20, 10); + + MetricsManager::StartInvocation(db, state_id3, FunctionType::LLM_FILTER); + MetricsManager::UpdateTokens(30, 15); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found_1 = false, found_2 = false, found_3 = false; + for (const auto& [key, value]: metrics.items()) { + if (key == "llm_filter_1") { + EXPECT_EQ(value["input_tokens"].get(), 10); + found_1 = true; + } else if (key == "llm_filter_2") { + EXPECT_EQ(value["input_tokens"].get(), 20); + found_2 = true; + } else if (key == "llm_filter_3") { + EXPECT_EQ(value["input_tokens"].get(), 30); + found_3 = true; + } + } - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_EQ(metrics["total_input_tokens"].get(), 350); - EXPECT_EQ(metrics["total_output_tokens"].get(), 175); - EXPECT_EQ(metrics["total_tokens"].get(), 525); + EXPECT_TRUE(found_1) << "llm_filter_1 not found"; + EXPECT_TRUE(found_2) << "llm_filter_2 not found"; + EXPECT_TRUE(found_3) << "llm_filter_3 not found"; } -TEST_F(MetricsTest, SqlFunctionFlockGetMetrics) { - FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); - FlockMetrics::GetInstance().IncrementApiCalls(); +TEST_F(MetricsTest, DebugMetricsReturnsNestedStructure) { + auto* db = GetDatabase(); + const void* state_id1 = reinterpret_cast(0x1111); + const void* state_id2 = reinterpret_cast(0x2222); + + MetricsManager::StartInvocation(db, state_id1, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + + MetricsManager::StartInvocation(db, state_id2, FunctionType::LLM_FILTER); + MetricsManager::UpdateTokens(200, 100); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + + auto& manager = GetMetricsManager(); + auto debug_metrics = manager.GetDebugMetrics(); + + EXPECT_TRUE(debug_metrics.is_object()); + EXPECT_TRUE(debug_metrics.contains("threads")); + EXPECT_TRUE(debug_metrics.contains("thread_count")); + EXPECT_GE(debug_metrics["thread_count"].get(), 1); + + auto threads = debug_metrics["threads"]; + EXPECT_TRUE(threads.is_object()); +} + +TEST_F(MetricsTest, DebugMetricsContainsRegistrationOrder) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + + auto& manager = GetMetricsManager(); + auto debug_metrics = manager.GetDebugMetrics(); + + bool found_registration_order = false; + for (const auto& [thread_id, thread_data]: debug_metrics["threads"].items()) { + for (const auto& [state_id_str, state_data]: thread_data.items()) { + if (state_data.contains("llm_complete")) { + EXPECT_TRUE(state_data["llm_complete"].contains("registration_order")); + EXPECT_GT(state_data["llm_complete"]["registration_order"].get(), 0); + found_registration_order = true; + } + } + } + EXPECT_TRUE(found_registration_order); +} +TEST_F(MetricsTest, SqlFunctionFlockGetDebugMetrics) { auto con = Config::GetConnection(); - auto results = con.Query("SELECT flock_get_metrics() AS metrics;"); + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0x1234); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + + auto results = con.Query("SELECT flock_get_debug_metrics() AS debug_metrics;"); ASSERT_FALSE(results->HasError()) << results->GetError(); ASSERT_EQ(results->RowCount(), 1); auto json_str = results->GetValue(0, 0).GetValue(); - auto metrics = nlohmann::json::parse(json_str); + auto debug_metrics = nlohmann::json::parse(json_str); - EXPECT_EQ(metrics["total_input_tokens"].get(), 100); - EXPECT_EQ(metrics["total_output_tokens"].get(), 50); - EXPECT_EQ(metrics["total_api_calls"].get(), 1); + EXPECT_TRUE(debug_metrics.is_object()); + EXPECT_TRUE(debug_metrics.contains("threads")); + EXPECT_TRUE(debug_metrics.contains("thread_count")); } -TEST_F(MetricsTest, SqlFunctionFlockResetMetrics) { - FlockMetrics::GetInstance().UpdateTokenUsage(100, 50); - FlockMetrics::GetInstance().IncrementApiCalls(); +TEST_F(MetricsTest, AggregateFunctionMetricsTracking) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0xAAAA); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_REDUCE); + MetricsManager::UpdateTokens(500, 200); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(2000.0); + MetricsManager::AddExecutionTime(2500.0); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_reduce_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 500); + EXPECT_EQ(value["output_tokens"].get(), 200); + EXPECT_EQ(value["total_tokens"].get(), 700); + EXPECT_EQ(value["api_calls"].get(), 1); + EXPECT_NEAR(value["api_duration_ms"].get(), 2000.0, 0.01); + EXPECT_NEAR(value["execution_time_ms"].get(), 2500.0, 0.01); + EXPECT_EQ(value["model_name"].get(), "gpt-4o"); + EXPECT_EQ(value["provider"].get(), "openai"); + found = true; + break; + } + } + EXPECT_TRUE(found); +} - auto con = Config::GetConnection(); - auto results = con.Query("SELECT flock_reset_metrics() AS result;"); +TEST_F(MetricsTest, MultipleAggregateFunctionsSequentialNumbering) { + auto* db = GetDatabase(); + const void* state_id1 = reinterpret_cast(0xBBBB); + const void* state_id2 = reinterpret_cast(0xCCCC); + + MetricsManager::StartInvocation(db, state_id1, FunctionType::LLM_REDUCE); + MetricsManager::UpdateTokens(100, 50); + + MetricsManager::StartInvocation(db, state_id2, FunctionType::LLM_REDUCE); + MetricsManager::UpdateTokens(200, 100); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found_1 = false, found_2 = false; + for (const auto& [key, value]: metrics.items()) { + if (key == "llm_reduce_1") { + EXPECT_EQ(value["input_tokens"].get(), 100); + found_1 = true; + } else if (key == "llm_reduce_2") { + EXPECT_EQ(value["input_tokens"].get(), 200); + found_2 = true; + } + } - ASSERT_FALSE(results->HasError()) << results->GetError(); - ASSERT_EQ(results->RowCount(), 1); + EXPECT_TRUE(found_1) << "llm_reduce_1 not found"; + EXPECT_TRUE(found_2) << "llm_reduce_2 not found"; +} - auto metrics = FlockMetrics::GetInstance().GetMetrics(); - EXPECT_EQ(metrics["total_input_tokens"].get(), 0); - EXPECT_EQ(metrics["total_output_tokens"].get(), 0); - EXPECT_EQ(metrics["total_api_calls"].get(), 0); +TEST_F(MetricsTest, AggregateFunctionDebugMetrics) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0xDDDD); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_RERANK); + MetricsManager::UpdateTokens(300, 150); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + + auto& manager = GetMetricsManager(); + auto debug_metrics = manager.GetDebugMetrics(); + + bool found_rerank = false; + for (const auto& [thread_id, thread_data]: debug_metrics["threads"].items()) { + for (const auto& [state_id_str, state_data]: thread_data.items()) { + if (state_data.contains("llm_rerank")) { + EXPECT_EQ(state_data["llm_rerank"]["input_tokens"].get(), 300); + EXPECT_EQ(state_data["llm_rerank"]["output_tokens"].get(), 150); + EXPECT_TRUE(state_data["llm_rerank"].contains("registration_order")); + found_rerank = true; + } + } + } + EXPECT_TRUE(found_rerank); +} + +TEST_F(MetricsTest, MixedScalarAndAggregateMetrics) { + auto* db = GetDatabase(); + const void* scalar_state = reinterpret_cast(0xEEEE); + const void* aggregate_state = reinterpret_cast(0xFFFF); + + MetricsManager::StartInvocation(db, scalar_state, FunctionType::LLM_COMPLETE); + MetricsManager::UpdateTokens(100, 50); + + MetricsManager::StartInvocation(db, aggregate_state, FunctionType::LLM_REDUCE); + MetricsManager::UpdateTokens(200, 100); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found_complete = false, found_reduce = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_complete_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 100); + found_complete = true; + } else if (key.find("llm_reduce_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 200); + found_reduce = true; + } + } + + EXPECT_TRUE(found_complete); + EXPECT_TRUE(found_reduce); +} + +TEST_F(MetricsTest, EmbeddingMetricsTracking) { + auto* db = GetDatabase(); + const void* state_id = reinterpret_cast(0xABCD); + + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_EMBEDDING); + MetricsManager::SetModelInfo("text-embedding-3-small", "openai"); + // For embeddings, typically only input tokens are used (no output tokens) + MetricsManager::UpdateTokens(150, 0); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(250.0); + MetricsManager::AddExecutionTime(300.0); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + bool found = false; + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_embedding_") == 0) { + EXPECT_EQ(value["input_tokens"].get(), 150); + EXPECT_EQ(value["output_tokens"].get(), 0); + EXPECT_EQ(value["total_tokens"].get(), 150); + EXPECT_EQ(value["api_calls"].get(), 1); + EXPECT_NEAR(value["api_duration_ms"].get(), 250.0, 0.01); + EXPECT_NEAR(value["execution_time_ms"].get(), 300.0, 0.01); + EXPECT_EQ(value["model_name"].get(), "text-embedding-3-small"); + EXPECT_EQ(value["provider"].get(), "openai"); + found = true; + break; + } + } + EXPECT_TRUE(found); } }// namespace flock From 19ea07b1110f7d8ce4b8c2b3d3c7b0d8a3ea7a00 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:53:06 -0500 Subject: [PATCH 09/59] Merged scalar and aggregate metrics tests --- .../tests/functions/scalar/test_metrics.py | 91 ------------------- 1 file changed, 91 deletions(-) delete mode 100644 test/integration/src/integration/tests/functions/scalar/test_metrics.py diff --git a/test/integration/src/integration/tests/functions/scalar/test_metrics.py b/test/integration/src/integration/tests/functions/scalar/test_metrics.py deleted file mode 100644 index c42101b0..00000000 --- a/test/integration/src/integration/tests/functions/scalar/test_metrics.py +++ /dev/null @@ -1,91 +0,0 @@ -import pytest -from integration.conftest import run_cli - - -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) -def model_config(request): - return request.param - - -def test_flock_get_metrics_returns_json(integration_setup): - duckdb_cli_path, db_path = integration_setup - query = "SELECT flock_get_metrics() AS metrics;" - result = run_cli(duckdb_cli_path, db_path, query) - - assert result.returncode == 0, f"Query failed with error: {result.stderr}" - assert "metrics" in result.stdout.lower() - assert "total_input_tokens" in result.stdout.lower() - assert "total_output_tokens" in result.stdout.lower() - assert "total_api_calls" in result.stdout.lower() - - -def test_flock_reset_metrics(integration_setup): - duckdb_cli_path, db_path = integration_setup - query = "SELECT flock_reset_metrics() AS result;" - result = run_cli(duckdb_cli_path, db_path, query) - - assert result.returncode == 0, f"Query failed with error: {result.stderr}" - assert "reset" in result.stdout.lower() - - -def test_metrics_after_llm_complete(integration_setup, model_config): - duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config - - run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") - - test_model_name = f"test-metrics-model_{model_name}" - create_model_query = ( - f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" - ) - run_cli(duckdb_cli_path, db_path, create_model_query) - - query = ( - """ - SELECT llm_complete( - {'model_name': '""" - + test_model_name - + """'}, - {'prompt': 'What is 2+2?'} - ) AS result; - """ - ) - run_cli(duckdb_cli_path, db_path, query) - - metrics_query = "SELECT flock_get_metrics() AS metrics;" - metrics_result = run_cli(duckdb_cli_path, db_path, metrics_query) - - assert metrics_result.returncode == 0 - assert "total_api_calls" in metrics_result.stdout.lower() - - -def test_metrics_reset_clears_counters(integration_setup, model_config): - duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config - - test_model_name = f"test-reset-model_{model_name}" - create_model_query = ( - f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" - ) - run_cli(duckdb_cli_path, db_path, create_model_query) - - query = ( - """ - SELECT llm_complete( - {'model_name': '""" - + test_model_name - + """'}, - {'prompt': 'Say hello'} - ) AS result; - """ - ) - run_cli(duckdb_cli_path, db_path, query) - - run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") - - metrics_query = "SELECT flock_get_metrics() AS metrics;" - metrics_result = run_cli(duckdb_cli_path, db_path, metrics_query) - - assert metrics_result.returncode == 0 - output = metrics_result.stdout.lower() - assert "total_api_calls" in output and ":0" in output.replace(" ", "") From 12c2605750fad44995afb32a94c8655542f9e429 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:53:13 -0500 Subject: [PATCH 10/59] Updated metrics CMakeLists --- src/metrics/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics/CMakeLists.txt b/src/metrics/CMakeLists.txt index 4c3059d1..a35c542e 100644 --- a/src/metrics/CMakeLists.txt +++ b/src/metrics/CMakeLists.txt @@ -1,4 +1,4 @@ set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/implementation.cpp + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/metrics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/registry.cpp PARENT_SCOPE) From 7934bad04770b9421e66caab08c3d5dd8281f159 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:55:26 -0500 Subject: [PATCH 11/59] Added merged metrics integration tests --- .../integration/tests/metrics/test_metrics.py | 724 ++++++++++++++++++ 1 file changed, 724 insertions(+) create mode 100644 test/integration/src/integration/tests/metrics/test_metrics.py diff --git a/test/integration/src/integration/tests/metrics/test_metrics.py b/test/integration/src/integration/tests/metrics/test_metrics.py new file mode 100644 index 00000000..98113070 --- /dev/null +++ b/test/integration/src/integration/tests/metrics/test_metrics.py @@ -0,0 +1,724 @@ +import pytest +import json +import csv +from io import StringIO +from integration.conftest import run_cli + + +def get_json_from_csv_output(stdout, column_name="metrics"): + """Extract JSON value from DuckDB CSV output""" + reader = csv.DictReader(StringIO(stdout)) + row = next(reader, None) + if row and column_name in row: + return json.loads(row[column_name]) + return None + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) +def model_config(request): + return request.param + + +# ============================================================================ +# Basic Metrics API Tests +# ============================================================================ + + +def test_flock_get_metrics_returns_json(integration_setup): + duckdb_cli_path, db_path = integration_setup + query = "SELECT flock_get_metrics() AS metrics;" + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + metrics = get_json_from_csv_output(result.stdout) + assert metrics is not None, "No JSON found in output" + + # Check new structure - should be a flat object + assert isinstance(metrics, dict) + assert len(metrics) == 0 # Initially empty + + +def test_flock_reset_metrics(integration_setup): + duckdb_cli_path, db_path = integration_setup + query = "SELECT flock_reset_metrics() AS result;" + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "reset" in result.stdout.lower() + + +# ============================================================================ +# Scalar Function Metrics Tests +# ============================================================================ + + +def test_metrics_after_llm_complete(integration_setup, model_config): + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_complete and get_metrics in the same query + query = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'What is 2+2?'} + ) AS result, + flock_get_metrics() AS metrics; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output to get metrics + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None, "No data returned from query" + assert "metrics" in row, "Metrics column not found in output" + + metrics = json.loads(row["metrics"]) + + # Check that metrics were recorded - should be a flat object with keys like "llm_complete_1" + assert isinstance(metrics, dict) + assert len(metrics) > 0 + + # Check that we have llm_complete_1 with proper structure + assert "llm_complete_1" in metrics, ( + f"Expected llm_complete_1 in metrics, got: {list(metrics.keys())}" + ) + llm_complete_1 = metrics["llm_complete_1"] + + assert "api_calls" in llm_complete_1 + assert llm_complete_1["api_calls"] > 0 + assert "input_tokens" in llm_complete_1 + assert "output_tokens" in llm_complete_1 + assert "total_tokens" in llm_complete_1 + assert "api_duration_ms" in llm_complete_1 + assert "execution_time_ms" in llm_complete_1 + assert "model_name" in llm_complete_1 + assert llm_complete_1["model_name"] == test_model_name + assert "provider" in llm_complete_1 + assert llm_complete_1["provider"] == provider + + +def test_metrics_reset_clears_counters(integration_setup, model_config): + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + test_model_name = f"test-reset-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # First query: execute llm_complete and get metrics in the same query + query1 = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Say hello'} + ) AS result, + flock_get_metrics() AS metrics; + """ + ) + result1 = run_cli(duckdb_cli_path, db_path, query1) + assert result1.returncode == 0 + + # Parse metrics from first query to verify they exist + reader1 = csv.DictReader(StringIO(result1.stdout)) + row1 = next(reader1, None) + assert row1 is not None and "metrics" in row1 + metrics1 = json.loads(row1["metrics"]) + assert len(metrics1) > 0, "Metrics should be recorded before reset" + assert "llm_complete_1" in metrics1, "Should have llm_complete_1 after first call" + + # Second query: reset metrics and get metrics in the same query + query2 = ( + "SELECT flock_reset_metrics() AS reset_result, flock_get_metrics() AS metrics;" + ) + result2 = run_cli(duckdb_cli_path, db_path, query2) + assert result2.returncode == 0 + + # Parse metrics from second query to verify they're cleared + reader2 = csv.DictReader(StringIO(result2.stdout)) + row2 = next(reader2, None) + assert row2 is not None and "metrics" in row2 + metrics2 = json.loads(row2["metrics"]) + + # After reset, should be empty + assert isinstance(metrics2, dict) + assert len(metrics2) == 0 + + +def test_sequential_numbering_multiple_calls(integration_setup, model_config): + """Test that multiple calls of the same function get sequential numbering""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-sequential-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Make three calls to llm_complete in the same query + query = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'First call'} + ) AS result1, + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Second call'} + ) AS result2, + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Third call'} + ) AS result3, + flock_get_metrics() AS metrics; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output to get metrics + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None, "No data returned from query" + assert "metrics" in row, "Metrics column not found in output" + + metrics = json.loads(row["metrics"]) + + # Should have llm_complete_1, llm_complete_2, llm_complete_3 + assert isinstance(metrics, dict) + assert len(metrics) >= 3, ( + f"Expected at least 3 metrics, got {len(metrics)}: {list(metrics.keys())}" + ) + + # Check that we have sequential numbering + found_keys = [key for key in metrics.keys() if key.startswith("llm_complete_")] + assert len(found_keys) >= 3, ( + f"Expected at least 3 llm_complete entries, got: {found_keys}" + ) + + # Verify each has the expected structure + for key in found_keys: + assert "api_calls" in metrics[key] + assert "input_tokens" in metrics[key] + assert "output_tokens" in metrics[key] + assert metrics[key]["api_calls"] == 1 + + +# ============================================================================ +# Debug Metrics Tests +# ============================================================================ + + +def test_flock_get_debug_metrics_returns_nested_structure( + integration_setup, model_config +): + """Test that flock_get_debug_metrics returns the nested structure""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-debug-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_complete and get debug metrics + query = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'What is 2+2?'} + ) AS result, + flock_get_debug_metrics() AS debug_metrics; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output to get debug metrics + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None, "No data returned from query" + assert "debug_metrics" in row, "Debug metrics column not found in output" + + debug_metrics = json.loads(row["debug_metrics"]) + + # Check nested structure + assert isinstance(debug_metrics, dict) + assert "threads" in debug_metrics + assert "thread_count" in debug_metrics + assert isinstance(debug_metrics["threads"], dict) + assert debug_metrics["thread_count"] > 0 + + # Check that threads contain state data + found_llm_complete = False + for thread_id, thread_data in debug_metrics["threads"].items(): + assert isinstance(thread_data, dict) + for state_id, state_data in thread_data.items(): + assert isinstance(state_data, dict) + if "llm_complete" in state_data: + llm_complete_data = state_data["llm_complete"] + assert "registration_order" in llm_complete_data + assert "api_calls" in llm_complete_data + assert "input_tokens" in llm_complete_data + assert "output_tokens" in llm_complete_data + found_llm_complete = True + + assert found_llm_complete, "llm_complete not found in debug metrics" + + +def test_debug_metrics_registration_order(integration_setup, model_config): + """Test that debug metrics include registration_order""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-reg-order-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Make multiple calls + query = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'First'} + ) AS result1, + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Second'} + ) AS result2, + flock_get_debug_metrics() AS debug_metrics; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0 + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "debug_metrics" in row + + debug_metrics = json.loads(row["debug_metrics"]) + + # Check registration orders + registration_orders = [] + for thread_id, thread_data in debug_metrics["threads"].items(): + for state_id, state_data in thread_data.items(): + if "llm_complete" in state_data: + reg_order = state_data["llm_complete"]["registration_order"] + registration_orders.append(reg_order) + + # Should have at least one registration order + assert len(registration_orders) > 0 + # Registration orders should be positive integers + for order in registration_orders: + assert isinstance(order, int) + assert order > 0 + + +# ============================================================================ +# Aggregate Function Metrics Tests +# ============================================================================ + + +def test_aggregate_function_metrics_tracking(integration_setup, model_config): + """Test that aggregate functions track metrics correctly""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-aggregate-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_reduce and get metrics + query = ( + """ + SELECT + category, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + ) AS summary, + flock_get_metrics() AS metrics + FROM VALUES + ('Electronics', 'High-performance laptop'), + ('Electronics', 'Latest smartphone') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None, "No data returned from query" + assert "metrics" in row, "Metrics column not found" + + metrics = json.loads(row["metrics"]) + + # Check that metrics were recorded + assert isinstance(metrics, dict) + assert len(metrics) > 0 + + # Check for llm_reduce metrics + found_reduce = False + for key in metrics.keys(): + if key.startswith("llm_reduce_"): + reduce_metrics = metrics[key] + assert "api_calls" in reduce_metrics + assert "input_tokens" in reduce_metrics + assert "output_tokens" in reduce_metrics + assert "total_tokens" in reduce_metrics + assert "api_duration_ms" in reduce_metrics + assert "execution_time_ms" in reduce_metrics + assert "model_name" in reduce_metrics + assert reduce_metrics["model_name"] == test_model_name + assert "provider" in reduce_metrics + assert reduce_metrics["provider"] == provider + found_reduce = True + break + + assert found_reduce, f"llm_reduce metrics not found in: {list(metrics.keys())}" + + +def test_multiple_aggregate_functions_sequential_numbering( + integration_setup, model_config +): + """Test that multiple aggregate function calls get sequential numbering""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-sequential-aggregate-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_reduce twice in the same query + query = ( + """ + SELECT + category, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'First prompt', 'context_columns': [{'data': description}]} + ) AS summary1, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Second prompt', 'context_columns': [{'data': description}]} + ) AS summary2, + flock_get_metrics() AS metrics + FROM VALUES + ('Electronics', 'High-performance laptop') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "metrics" in row + + metrics = json.loads(row["metrics"]) + + # Should have llm_reduce_1 and llm_reduce_2 + found_keys = [key for key in metrics.keys() if key.startswith("llm_reduce_")] + assert len(found_keys) >= 2, ( + f"Expected at least 2 llm_reduce entries, got: {found_keys}" + ) + + # Verify sequential numbering + numbers = [] + for key in found_keys: + # Extract number from key like "llm_reduce_1" + num = int(key.split("_")[-1]) + numbers.append(num) + + numbers.sort() + # Should have sequential numbers starting from 1 + assert numbers[0] == 1, f"First number should be 1, got {numbers}" + + +def test_aggregate_function_debug_metrics(integration_setup, model_config): + """Test debug metrics for aggregate functions""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-debug-aggregate-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT + category, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + ) AS summary, + flock_get_debug_metrics() AS debug_metrics + FROM VALUES + ('Electronics', 'High-performance laptop') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0 + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "debug_metrics" in row + + debug_metrics = json.loads(row["debug_metrics"]) + + # Check nested structure + assert isinstance(debug_metrics, dict) + assert "threads" in debug_metrics + assert "thread_count" in debug_metrics + + # Check that llm_reduce appears in debug metrics + found_llm_reduce = False + for thread_id, thread_data in debug_metrics["threads"].items(): + for state_id, state_data in thread_data.items(): + if "llm_reduce" in state_data: + reduce_data = state_data["llm_reduce"] + assert "registration_order" in reduce_data + assert "api_calls" in reduce_data + assert "input_tokens" in reduce_data + assert "output_tokens" in reduce_data + found_llm_reduce = True + + assert found_llm_reduce, "llm_reduce not found in debug metrics" + + +def test_llm_rerank_metrics(integration_setup, model_config): + """Test metrics for llm_rerank aggregate function""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-rerank-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT + llm_rerank( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Rank these', 'context_columns': [{'data': description}]} + ) AS ranked, + flock_get_metrics() AS metrics + FROM VALUES + ('Product 1'), + ('Product 2'), + ('Product 3') + AS t(description); + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0 + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "metrics" in row + + metrics = json.loads(row["metrics"]) + + # Check for llm_rerank metrics + found_rerank = False + for key in metrics.keys(): + if key.startswith("llm_rerank_"): + rerank_metrics = metrics[key] + assert "api_calls" in rerank_metrics + assert "input_tokens" in rerank_metrics + assert "output_tokens" in rerank_metrics + found_rerank = True + break + + assert found_rerank, f"llm_rerank metrics not found in: {list(metrics.keys())}" + + +def test_llm_first_metrics(integration_setup, model_config): + """Test metrics for llm_first aggregate function""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-first-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT + category, + llm_first( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Select first', 'context_columns': [{'data': description}]} + ) AS first_item, + flock_get_metrics() AS metrics + FROM VALUES + ('Electronics', 'Product 1'), + ('Electronics', 'Product 2') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0 + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "metrics" in row + + metrics = json.loads(row["metrics"]) + + # Check for llm_first metrics + found_first = False + for key in metrics.keys(): + if key.startswith("llm_first_"): + first_metrics = metrics[key] + assert "api_calls" in first_metrics + found_first = True + break + + assert found_first, f"llm_first metrics not found in: {list(metrics.keys())}" + + +# ============================================================================ +# Mixed Scalar and Aggregate Tests +# ============================================================================ + + +def test_mixed_scalar_and_aggregate_metrics(integration_setup, model_config): + """Test that both scalar and aggregate functions are tracked separately""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-mixed-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Hello'} + ) AS scalar_result, + (SELECT llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + ) FROM VALUES ('Test description') AS t(description)) AS aggregate_result, + flock_get_metrics() AS metrics; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0 + + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "metrics" in row + + metrics = json.loads(row["metrics"]) + + # Should have both scalar and aggregate metrics + has_scalar = any(key.startswith("llm_complete_") for key in metrics.keys()) + has_aggregate = any(key.startswith("llm_reduce_") for key in metrics.keys()) + + assert has_scalar, "Scalar function metrics not found" + assert has_aggregate, "Aggregate function metrics not found" From 4de6e21f93a8dd35d48b8e6260e7a0029a262ee0 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:56:06 -0500 Subject: [PATCH 12/59] Fixed code formatting --- src/include/filesystem.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/filesystem.hpp b/src/include/filesystem.hpp index 5e2bde20..bb8f2c49 100644 --- a/src/include/filesystem.hpp +++ b/src/include/filesystem.hpp @@ -41,7 +41,7 @@ #endif // Not on Visual Studio. Let's use the normal version -#else // #ifdef _MSC_VER +#else// #ifdef _MSC_VER #define INCLUDE_STD_FILESYSTEM_EXPERIMENTAL 0 #endif @@ -70,4 +70,4 @@ namespace filesystem = experimental::filesystem; #include #endif -#endif // #ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL +#endif// #ifndef INCLUDE_STD_FILESYSTEM_EXPERIMENTAL From 03f58a4d1a8a4387cfc46fc4614462611916ecf8 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 15:59:13 -0500 Subject: [PATCH 13/59] Fixed include in llm_complete --- src/functions/scalar/llm_complete/implementation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index 1c354a32..37839cff 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -1,5 +1,5 @@ #include "flock/functions/scalar/llm_complete.hpp" -#include "flock/metrics/metrics.hpp" +#include "flock/metrics/manager.hpp" #include From d86c4c28f520e905ed66fa7722c2d683269a6c3a Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 16:00:04 -0500 Subject: [PATCH 14/59] Replaced old FlockMetrics API call --- src/functions/scalar/llm_complete/implementation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index 37839cff..1f96338c 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -84,7 +84,7 @@ void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& stat // End execution timing and update metrics auto exec_end = std::chrono::high_resolution_clock::now(); double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - FlockMetrics::GetInstance().AddExecutionTime(exec_duration_ms); + MetricsManager::AddExecutionTime(exec_duration_ms); } }// namespace flock From 5b7d511ab714089ed761425802f05cd7614170e0 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 17:44:29 -0500 Subject: [PATCH 15/59] Add missing metrics tracking to llm_complete function --- .../scalar/llm_complete/implementation.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index 1f96338c..f4da88ea 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -28,6 +28,11 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { // LlmComplete::ValidateArguments(args); auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); Model model(model_details_json); + + // Set model name and provider in metrics (context is already set in Execute) + auto model_details = model.GetModelDetails(); + MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); + auto prompt_context_json = CastVectorOfStructsToJson(args.data[1], args.size()); auto context_columns = nlohmann::json::array(); if (prompt_context_json.contains("context_columns")) { @@ -66,6 +71,14 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { } void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { + // Get database instance and state ID for metrics + auto& context = state.GetContext(); + auto* db = context.db.get(); + const void* state_id = static_cast(&state); + + // Start metrics tracking + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + // Start execution timing auto exec_start = std::chrono::high_resolution_clock::now(); From 12675b2feb1d204fdcbb5d55241232597d2a11f9 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 18:02:17 -0500 Subject: [PATCH 16/59] Update test prompts to ensure 1-2 word responses for faster, more predictable tests --- .../integration/tests/metrics/test_metrics.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/integration/src/integration/tests/metrics/test_metrics.py b/test/integration/src/integration/tests/metrics/test_metrics.py index 98113070..c6916b1d 100644 --- a/test/integration/src/integration/tests/metrics/test_metrics.py +++ b/test/integration/src/integration/tests/metrics/test_metrics.py @@ -73,7 +73,7 @@ def test_metrics_after_llm_complete(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'What is 2+2?'} + {'prompt': 'Answer with one number: What is 2+2?'} ) AS result, flock_get_metrics() AS metrics; """ @@ -131,7 +131,7 @@ def test_metrics_reset_clears_counters(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Say hello'} + {'prompt': 'Say one word: hello'} ) AS result, flock_get_metrics() AS metrics; """ @@ -186,19 +186,19 @@ def test_sequential_numbering_multiple_calls(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'First call'} + {'prompt': 'Say: one'} ) AS result1, llm_complete( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Second call'} + {'prompt': 'Say: two'} ) AS result2, llm_complete( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Third call'} + {'prompt': 'Say: three'} ) AS result3, flock_get_metrics() AS metrics; """ @@ -263,7 +263,7 @@ def test_flock_get_debug_metrics_returns_nested_structure( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'What is 2+2?'} + {'prompt': 'Answer with one number: What is 2+2?'} ) AS result, flock_get_debug_metrics() AS debug_metrics; """ @@ -325,13 +325,13 @@ def test_debug_metrics_registration_order(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'First'} + {'prompt': 'Say: one'} ) AS result1, llm_complete( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Second'} + {'prompt': 'Say: two'} ) AS result2, flock_get_debug_metrics() AS debug_metrics; """ @@ -389,7 +389,7 @@ def test_aggregate_function_metrics_tracking(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + {'prompt': 'One word summary:', 'context_columns': [{'data': description}]} ) AS summary, flock_get_metrics() AS metrics FROM VALUES @@ -460,13 +460,13 @@ def test_multiple_aggregate_functions_sequential_numbering( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'First prompt', 'context_columns': [{'data': description}]} + {'prompt': 'One word 1:', 'context_columns': [{'data': description}]} ) AS summary1, llm_reduce( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Second prompt', 'context_columns': [{'data': description}]} + {'prompt': 'One word 2:', 'context_columns': [{'data': description}]} ) AS summary2, flock_get_metrics() AS metrics FROM VALUES @@ -524,7 +524,7 @@ def test_aggregate_function_debug_metrics(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + {'prompt': 'One word summary:', 'context_columns': [{'data': description}]} ) AS summary, flock_get_debug_metrics() AS debug_metrics FROM VALUES @@ -583,7 +583,7 @@ def test_llm_rerank_metrics(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Rank these', 'context_columns': [{'data': description}]} + {'prompt': 'One word rank:', 'context_columns': [{'data': description}]} ) AS ranked, flock_get_metrics() AS metrics FROM VALUES @@ -638,7 +638,7 @@ def test_llm_first_metrics(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Select first', 'context_columns': [{'data': description}]} + {'prompt': 'One word:', 'context_columns': [{'data': description}]} ) AS first_item, flock_get_metrics() AS metrics FROM VALUES @@ -695,13 +695,13 @@ def test_mixed_scalar_and_aggregate_metrics(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Hello'} + {'prompt': 'Say: hi'} ) AS scalar_result, (SELECT llm_reduce( {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize', 'context_columns': [{'data': description}]} + {'prompt': 'One word summary:', 'context_columns': [{'data': description}]} ) FROM VALUES ('Test description') AS t(description)) AS aggregate_result, flock_get_metrics() AS metrics; """ From c66bb3a0cbedb1cfaecd3b99d8b81c8c5dfc0d33 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 18:48:01 -0500 Subject: [PATCH 17/59] Remove legacy MetricsContext class (replaced by MetricsManager) --- src/include/flock/metrics/context.hpp | 45 --------------------------- 1 file changed, 45 deletions(-) delete mode 100644 src/include/flock/metrics/context.hpp diff --git a/src/include/flock/metrics/context.hpp b/src/include/flock/metrics/context.hpp deleted file mode 100644 index 3e8487da..00000000 --- a/src/include/flock/metrics/context.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include "duckdb/main/database.hpp" -#include "flock/metrics/types.hpp" - -namespace flock { - -// Thread-local storage for metrics context (legacy, not used in function code) -class MetricsContext { -public: - static void SetWithDatabase(duckdb::DatabaseInstance* db, const void* state_id, FunctionType type) noexcept { - current_database_ = db; - current_state_id_ = state_id; - current_function_ = type; - } - - static void Clear() noexcept { - current_database_ = nullptr; - current_state_id_ = nullptr; - current_function_ = FunctionType::UNKNOWN; - } - - static duckdb::DatabaseInstance* GetDatabase() noexcept { - return current_database_; - } - - static const void* GetStateId() noexcept { - return current_state_id_; - } - - static FunctionType GetFunctionType() noexcept { - return current_function_; - } - - static bool IsActive() noexcept { - return current_database_ != nullptr && current_state_id_ != nullptr && current_function_ != FunctionType::UNKNOWN; - } - -private: - static thread_local duckdb::DatabaseInstance* current_database_; - static thread_local const void* current_state_id_; - static thread_local FunctionType current_function_; -}; - -}// namespace flock From 7aab760c7e92304f4c113ac1e9eca9b9eb4b139f Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sat, 6 Dec 2025 18:56:46 -0500 Subject: [PATCH 18/59] Centralized shared standard library includes in common.hpp --- src/include/flock/core/common.hpp | 9 +++++++++ src/include/flock/custom_parser/query/model_parser.hpp | 3 --- src/include/flock/custom_parser/query/prompt_parser.hpp | 3 --- src/include/flock/custom_parser/query_parser.hpp | 3 --- src/include/flock/custom_parser/query_statements.hpp | 4 ---- src/include/flock/custom_parser/tokenizer.hpp | 2 +- src/include/flock/functions/aggregate/aggregate.hpp | 4 +--- src/include/flock/functions/scalar/scalar.hpp | 2 +- src/include/flock/metrics/base_manager.hpp | 2 -- src/include/flock/metrics/data_structures.hpp | 1 + src/include/flock/model_manager/model.hpp | 6 ++---- .../model_manager/providers/handlers/base_handler.hpp | 5 +---- .../flock/model_manager/providers/handlers/handler.hpp | 1 + .../flock/model_manager/providers/handlers/ollama.hpp | 5 ----- .../flock/model_manager/providers/handlers/openai.hpp | 4 ---- src/include/flock/model_manager/providers/provider.hpp | 3 ++- src/include/flock/model_manager/repository.hpp | 2 +- src/include/flock/prompt_manager/prompt_manager.hpp | 5 ++--- src/include/flock/prompt_manager/repository.hpp | 2 +- src/include/flock/secret_manager/secret_manager.hpp | 1 - 20 files changed, 23 insertions(+), 44 deletions(-) diff --git a/src/include/flock/core/common.hpp b/src/include/flock/core/common.hpp index bb909ea4..7c68ce6f 100644 --- a/src/include/flock/core/common.hpp +++ b/src/include/flock/core/common.hpp @@ -1,6 +1,15 @@ #pragma once +// DuckDB includes #include "duckdb.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" + +// Common standard library includes +#include +#include +#include +#include +#include +#include diff --git a/src/include/flock/custom_parser/query/model_parser.hpp b/src/include/flock/custom_parser/query/model_parser.hpp index a18fedbb..bd5a6baa 100644 --- a/src/include/flock/custom_parser/query/model_parser.hpp +++ b/src/include/flock/custom_parser/query/model_parser.hpp @@ -5,10 +5,7 @@ #include "flock/custom_parser/tokenizer.hpp" #include "fmt/format.h" -#include #include -#include -#include namespace flock { diff --git a/src/include/flock/custom_parser/query/prompt_parser.hpp b/src/include/flock/custom_parser/query/prompt_parser.hpp index b2e422ec..eeca50e0 100644 --- a/src/include/flock/custom_parser/query/prompt_parser.hpp +++ b/src/include/flock/custom_parser/query/prompt_parser.hpp @@ -5,9 +5,6 @@ #include "flock/custom_parser/tokenizer.hpp" #include "fmt/format.h" -#include -#include -#include namespace flock { diff --git a/src/include/flock/custom_parser/query_parser.hpp b/src/include/flock/custom_parser/query_parser.hpp index bbdac178..3b4b1e25 100644 --- a/src/include/flock/custom_parser/query_parser.hpp +++ b/src/include/flock/custom_parser/query_parser.hpp @@ -7,9 +7,6 @@ #include "flock/custom_parser/tokenizer.hpp" #include "fmt/format.h" -#include -#include -#include namespace flock { diff --git a/src/include/flock/custom_parser/query_statements.hpp b/src/include/flock/custom_parser/query_statements.hpp index 2528bf88..88446eda 100644 --- a/src/include/flock/custom_parser/query_statements.hpp +++ b/src/include/flock/custom_parser/query_statements.hpp @@ -2,10 +2,6 @@ #include "flock/core/common.hpp" -#include -#include -#include - namespace flock { // Enum to represent different statement types diff --git a/src/include/flock/custom_parser/tokenizer.hpp b/src/include/flock/custom_parser/tokenizer.hpp index 17c7b378..c2d28647 100644 --- a/src/include/flock/custom_parser/tokenizer.hpp +++ b/src/include/flock/custom_parser/tokenizer.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include "flock/core/common.hpp" namespace flock { diff --git a/src/include/flock/functions/aggregate/aggregate.hpp b/src/include/flock/functions/aggregate/aggregate.hpp index efa93436..3b341d20 100644 --- a/src/include/flock/functions/aggregate/aggregate.hpp +++ b/src/include/flock/functions/aggregate/aggregate.hpp @@ -1,12 +1,10 @@ #pragma once -#include -#include - #include "flock/core/common.hpp" #include "flock/functions/input_parser.hpp" #include "flock/metrics/manager.hpp" #include "flock/model_manager/model.hpp" +#include namespace flock { diff --git a/src/include/flock/functions/scalar/scalar.hpp b/src/include/flock/functions/scalar/scalar.hpp index bc8585c7..ebf4726e 100644 --- a/src/include/flock/functions/scalar/scalar.hpp +++ b/src/include/flock/functions/scalar/scalar.hpp @@ -1,12 +1,12 @@ #pragma once #include -#include #include "flock/core/common.hpp" #include "flock/functions/input_parser.hpp" #include "flock/model_manager/model.hpp" #include "flock/prompt_manager/prompt_manager.hpp" +#include namespace flock { diff --git a/src/include/flock/metrics/base_manager.hpp b/src/include/flock/metrics/base_manager.hpp index 7701724c..2dfb1fb5 100644 --- a/src/include/flock/metrics/base_manager.hpp +++ b/src/include/flock/metrics/base_manager.hpp @@ -4,10 +4,8 @@ #include #include #include -#include #include #include -#include namespace flock { diff --git a/src/include/flock/metrics/data_structures.hpp b/src/include/flock/metrics/data_structures.hpp index 4ae30316..27741e81 100644 --- a/src/include/flock/metrics/data_structures.hpp +++ b/src/include/flock/metrics/data_structures.hpp @@ -1,5 +1,6 @@ #pragma once +#include "flock/core/common.hpp" #include "flock/metrics/types.hpp" #include #include diff --git a/src/include/flock/model_manager/model.hpp b/src/include/flock/model_manager/model.hpp index bf1b2dc6..ee3085b5 100644 --- a/src/include/flock/model_manager/model.hpp +++ b/src/include/flock/model_manager/model.hpp @@ -1,19 +1,17 @@ #pragma once #include "fmt/format.h" -#include -#include -#include #include -#include #include "duckdb/main/connection.hpp" +#include "flock/core/common.hpp" #include "flock/core/config.hpp" #include "flock/model_manager/providers/adapters/azure.hpp" #include "flock/model_manager/providers/adapters/ollama.hpp" #include "flock/model_manager/providers/adapters/openai.hpp" #include "flock/model_manager/providers/handlers/ollama.hpp" #include "flock/model_manager/repository.hpp" +#include namespace flock { diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 27b89dc3..6c21aa47 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -1,14 +1,11 @@ #pragma once +#include "flock/core/common.hpp" #include "flock/metrics/manager.hpp" #include "flock/model_manager/providers/handlers/handler.hpp" #include "session.hpp" #include -#include #include -#include -#include -#include namespace flock { diff --git a/src/include/flock/model_manager/providers/handlers/handler.hpp b/src/include/flock/model_manager/providers/handlers/handler.hpp index 8cf18d79..51fe282a 100644 --- a/src/include/flock/model_manager/providers/handlers/handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/handler.hpp @@ -1,5 +1,6 @@ #pragma once +#include "flock/core/common.hpp" #include namespace flock { diff --git a/src/include/flock/model_manager/providers/handlers/ollama.hpp b/src/include/flock/model_manager/providers/handlers/ollama.hpp index 165abfc3..aae9a26c 100644 --- a/src/include/flock/model_manager/providers/handlers/ollama.hpp +++ b/src/include/flock/model_manager/providers/handlers/ollama.hpp @@ -4,11 +4,6 @@ #include "session.hpp" #include #include -#include -#include -#include -#include -#include namespace flock { diff --git a/src/include/flock/model_manager/providers/handlers/openai.hpp b/src/include/flock/model_manager/providers/handlers/openai.hpp index 86ee191f..44895778 100644 --- a/src/include/flock/model_manager/providers/handlers/openai.hpp +++ b/src/include/flock/model_manager/providers/handlers/openai.hpp @@ -3,10 +3,6 @@ #include "flock/model_manager/providers/handlers/base_handler.hpp" #include "session.hpp" #include -#include -#include -#include -#include namespace flock { diff --git a/src/include/flock/model_manager/providers/provider.hpp b/src/include/flock/model_manager/providers/provider.hpp index 928e75de..74c302a2 100644 --- a/src/include/flock/model_manager/providers/provider.hpp +++ b/src/include/flock/model_manager/providers/provider.hpp @@ -1,11 +1,12 @@ #pragma once #include "fmt/format.h" -#include #include +#include "flock/core/common.hpp" #include "flock/model_manager/providers/handlers/handler.hpp" #include "flock/model_manager/repository.hpp" +#include namespace flock { diff --git a/src/include/flock/model_manager/repository.hpp b/src/include/flock/model_manager/repository.hpp index 7efeb950..e4aa7717 100644 --- a/src/include/flock/model_manager/repository.hpp +++ b/src/include/flock/model_manager/repository.hpp @@ -1,9 +1,9 @@ #pragma once +#include "flock/core/common.hpp" #include #include #include -#include #include namespace flock { diff --git a/src/include/flock/prompt_manager/prompt_manager.hpp b/src/include/flock/prompt_manager/prompt_manager.hpp index 021991ba..9d3e203e 100644 --- a/src/include/flock/prompt_manager/prompt_manager.hpp +++ b/src/include/flock/prompt_manager/prompt_manager.hpp @@ -1,12 +1,11 @@ #pragma once #include -#include -#include -#include +#include "flock/core/common.hpp" #include "flock/core/config.hpp" #include "flock/prompt_manager/repository.hpp" +#include namespace flock { diff --git a/src/include/flock/prompt_manager/repository.hpp b/src/include/flock/prompt_manager/repository.hpp index f525e420..ba01a293 100644 --- a/src/include/flock/prompt_manager/repository.hpp +++ b/src/include/flock/prompt_manager/repository.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include "flock/core/common.hpp" #include namespace flock { diff --git a/src/include/flock/secret_manager/secret_manager.hpp b/src/include/flock/secret_manager/secret_manager.hpp index 364510a6..a852c6e9 100644 --- a/src/include/flock/secret_manager/secret_manager.hpp +++ b/src/include/flock/secret_manager/secret_manager.hpp @@ -1,7 +1,6 @@ #pragma once #include "flock/core/common.hpp" -#include namespace flock { From d0325e8c01e53db2322653b7e2a171b7e74ac953 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sun, 7 Dec 2025 13:12:32 -0500 Subject: [PATCH 19/59] Add metrics merging for aggregate functions --- .../llm_first_or_last/implementation.cpp | 21 +++++- .../aggregate/llm_reduce/implementation.cpp | 29 ++++++-- .../aggregate/llm_rerank/implementation.cpp | 21 +++++- src/include/flock/metrics/manager.hpp | 8 +++ src/metrics/metrics.cpp | 72 +++++++++++++++++++ 5 files changed, 141 insertions(+), 10 deletions(-) diff --git a/src/functions/aggregate/llm_first_or_last/implementation.cpp b/src/functions/aggregate/llm_first_or_last/implementation.cpp index 98d052be..3f249037 100644 --- a/src/functions/aggregate/llm_first_or_last/implementation.cpp +++ b/src/functions/aggregate/llm_first_or_last/implementation.cpp @@ -3,6 +3,7 @@ #include "flock/metrics/manager.hpp" #include +#include namespace flock { @@ -84,6 +85,12 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn // Map AggregateFunctionType to FunctionType FunctionType metrics_function_type = (function_type == AggregateFunctionType::FIRST) ? FunctionType::LLM_FIRST : FunctionType::LLM_LAST; + auto db = Config::db; + std::vector processed_state_ids; + std::string merged_model_name; + std::string merged_provider; + + // Process each state individually for (idx_t i = 0; i < count; i++) { auto idx = i + offset; auto* state = states_vector[idx]; @@ -93,14 +100,20 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn Model model(state->model_details); auto model_details_obj = model.GetModelDetails(); - // Get database instance and state ID for metrics - auto db = Config::db; + // Get state ID for metrics const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); // Start metrics tracking MetricsManager::StartInvocation(db, state_id, metrics_function_type); MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + // Store model info for merged metrics (use first non-empty) + if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { + merged_model_name = model_details_obj.model_name; + merged_provider = model_details_obj.provider_name; + } + auto exec_start = std::chrono::high_resolution_clock::now(); auto tuples_with_ids = *state->value; @@ -127,6 +140,10 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn result.SetValue(idx, nullptr); } } + + // Merge all metrics from processed states into a single metrics entry + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, metrics_function_type, + merged_model_name, merged_provider); } }// namespace flock diff --git a/src/functions/aggregate/llm_reduce/implementation.cpp b/src/functions/aggregate/llm_reduce/implementation.cpp index 2d050de8..815868a5 100644 --- a/src/functions/aggregate/llm_reduce/implementation.cpp +++ b/src/functions/aggregate/llm_reduce/implementation.cpp @@ -3,6 +3,7 @@ #include "flock/metrics/manager.hpp" #include +#include namespace flock { @@ -70,23 +71,35 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa const AggregateFunctionType function_type) { const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); + auto db = Config::db; + std::vector processed_state_ids; + std::string merged_model_name; + std::string merged_provider; + + // Process each state individually for (idx_t i = 0; i < count; i++) { auto idx = i + offset; auto* state = states_vector[idx]; - if (state && !state->value->empty()) { - // Use model_details and user_query from the state (not static variables) + if (state && state->value && !state->value->empty()) { + // Use model_details and user_query from the state Model model(state->model_details); auto model_details_obj = model.GetModelDetails(); - // Get database instance and state ID for metrics - auto db = Config::db; + // Get state ID for metrics const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); - // Start metrics tracking + // Start metrics tracking for this state MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_REDUCE); MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + // Store model info for merged metrics (use first non-empty) + if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { + merged_model_name = model_details_obj.model_name; + merged_provider = model_details_obj.provider_name; + } + auto exec_start = std::chrono::high_resolution_clock::now(); LlmReduce reduce_instance; @@ -104,9 +117,13 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa result.SetValue(idx, response.dump()); } } else { - result.SetValue(idx, nullptr);// Empty result for null/empty states + result.SetValue(idx, nullptr); } } + + // Merge all metrics from processed states into a single metrics entry + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_REDUCE, + merged_model_name, merged_provider); } }// namespace flock diff --git a/src/functions/aggregate/llm_rerank/implementation.cpp b/src/functions/aggregate/llm_rerank/implementation.cpp index fc261eed..e6a756e2 100644 --- a/src/functions/aggregate/llm_rerank/implementation.cpp +++ b/src/functions/aggregate/llm_rerank/implementation.cpp @@ -3,6 +3,7 @@ #include "flock/metrics/manager.hpp" #include +#include namespace flock { @@ -120,6 +121,12 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg idx_t count, idx_t offset) { const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); + auto db = Config::db; + std::vector processed_state_ids; + std::string merged_model_name; + std::string merged_provider; + + // Process each state individually for (idx_t i = 0; i < count; i++) { auto idx = i + offset; auto* state = states_vector[idx]; @@ -129,14 +136,20 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg Model model(state->model_details); auto model_details_obj = model.GetModelDetails(); - // Get database instance and state ID for metrics - auto db = Config::db; + // Get state ID for metrics const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); // Start metrics tracking MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_RERANK); MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + // Store model info for merged metrics (use first non-empty) + if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { + merged_model_name = model_details_obj.model_name; + merged_provider = model_details_obj.provider_name; + } + auto exec_start = std::chrono::high_resolution_clock::now(); auto tuples_with_ids = nlohmann::json::array(); @@ -157,6 +170,10 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg result.SetValue(idx, nullptr); } } + + // Merge all metrics from processed states into a single metrics entry + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_RERANK, + merged_model_name, merged_provider); } }// namespace flock diff --git a/src/include/flock/metrics/manager.hpp b/src/include/flock/metrics/manager.hpp index 6c716f2a..629c5ff7 100644 --- a/src/include/flock/metrics/manager.hpp +++ b/src/include/flock/metrics/manager.hpp @@ -92,6 +92,14 @@ class MetricsManager : public BaseMetricsManager { current_function_type_ = FunctionType::UNKNOWN; } + // Merge metrics from multiple states into a single state + // This is used by aggregate functions to consolidate metrics from all processed states + static void MergeAggregateMetrics(duckdb::DatabaseInstance* db, + const std::vector& processed_state_ids, + FunctionType function_type, + const std::string& model_name = "", + const std::string& provider = ""); + // SQL function implementations static void ExecuteGetMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); static void ExecuteGetDebugMetrics(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); diff --git a/src/metrics/metrics.cpp b/src/metrics/metrics.cpp index 7ec2f174..85eeff90 100644 --- a/src/metrics/metrics.cpp +++ b/src/metrics/metrics.cpp @@ -1,4 +1,6 @@ +#include "flock/metrics/data_structures.hpp" #include "flock/metrics/manager.hpp" +#include namespace flock { @@ -47,4 +49,74 @@ void MetricsManager::ExecuteResetMetrics(duckdb::DataChunk& args, duckdb::Expres result_data[0] = duckdb::StringVector::AddString(result, "Metrics reset successfully"); } +void MetricsManager::MergeAggregateMetrics(duckdb::DatabaseInstance* db, + const std::vector& processed_state_ids, + FunctionType function_type, + const std::string& model_name, + const std::string& provider) { + if (processed_state_ids.empty() || db == nullptr) { + return; + } + + auto& manager = GetForDatabase(db); + + // Use the first state_id as the merged state_id + const void* merged_state_id = processed_state_ids[0]; + + // Start a new invocation for the merged metrics (registers the state and sets registration order) + StartInvocation(db, merged_state_id, function_type); + + // Get and merge metrics from all processed states + int64_t total_input_tokens = 0; + int64_t total_output_tokens = 0; + int64_t total_api_calls = 0; + int64_t total_api_duration_us = 0; + int64_t total_execution_time_us = 0; + std::string final_model_name = model_name; + std::string final_provider = provider; + + for (const void* state_id: processed_state_ids) { + auto& thread_metrics = manager.GetThreadMetrics(state_id); + const auto& metrics = thread_metrics.GetMetrics(function_type); + + if (!metrics.IsEmpty()) { + total_input_tokens += metrics.input_tokens; + total_output_tokens += metrics.output_tokens; + total_api_calls += metrics.api_calls; + total_api_duration_us += metrics.api_duration_us; + total_execution_time_us += metrics.execution_time_us; + + // Use model info from first non-empty state if not provided + if (final_model_name.empty() && !metrics.model_name.empty()) { + final_model_name = metrics.model_name; + final_provider = metrics.provider; + } + } + } + + // Get the merged state's metrics and set aggregated values + auto& merged_thread_metrics = manager.GetThreadMetrics(merged_state_id); + auto& merged_metrics = merged_thread_metrics.GetMetrics(function_type); + + // Set the aggregated values directly + merged_metrics.input_tokens = total_input_tokens; + merged_metrics.output_tokens = total_output_tokens; + merged_metrics.api_calls = total_api_calls; + merged_metrics.api_duration_us = total_api_duration_us; + merged_metrics.execution_time_us = total_execution_time_us; + if (!final_model_name.empty()) { + merged_metrics.model_name = final_model_name; + merged_metrics.provider = final_provider; + } + + // Clean up individual state metrics (reset function_type metrics for all except the merged one) + for (size_t i = 1; i < processed_state_ids.size(); i++) { + const void* state_id = processed_state_ids[i]; + auto& thread_metrics = manager.GetThreadMetrics(state_id); + auto& metrics = thread_metrics.GetMetrics(function_type); + // Reset only the specific function_type metrics for this state + metrics = FunctionMetricsData{}; + } +} + }// namespace flock From 4ad6d1cac93288e4c29c935fe20d2ad261820158 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sun, 7 Dec 2025 13:12:38 -0500 Subject: [PATCH 20/59] Add tests for metrics merging --- .../integration/tests/metrics/test_metrics.py | 130 ++++++++++++++++++ test/unit/functions/scalar/metrics_test.cpp | 66 +++++++++ 2 files changed, 196 insertions(+) diff --git a/test/integration/src/integration/tests/metrics/test_metrics.py b/test/integration/src/integration/tests/metrics/test_metrics.py index c6916b1d..8513a225 100644 --- a/test/integration/src/integration/tests/metrics/test_metrics.py +++ b/test/integration/src/integration/tests/metrics/test_metrics.py @@ -436,6 +436,136 @@ def test_aggregate_function_metrics_tracking(integration_setup, model_config): assert found_reduce, f"llm_reduce metrics not found in: {list(metrics.keys())}" +def test_aggregate_function_metrics_merging_with_group_by( + integration_setup, model_config +): + """Test that metrics from multiple states in a single aggregate call are merged into one entry""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-merge-metrics-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_reduce with GROUP BY that will process multiple states + # This should result in multiple states being processed, but only ONE merged metrics entry + query = ( + """ + SELECT + category, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'One word summary:', 'context_columns': [{'data': description}]} + ) AS summary, + flock_get_metrics() AS metrics + FROM VALUES + ('Electronics', 'High-performance laptop'), + ('Electronics', 'Latest smartphone'), + ('Electronics', 'Gaming console') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None, "No data returned from query" + assert "metrics" in row, "Metrics column not found" + + metrics = json.loads(row["metrics"]) + + # Check that metrics were recorded + assert isinstance(metrics, dict) + assert len(metrics) > 0 + + # Check for llm_reduce metrics - should have ONLY ONE entry (merged) + found_reduce_keys = [key for key in metrics.keys() if key.startswith("llm_reduce_")] + assert len(found_reduce_keys) == 1, ( + f"Expected exactly 1 llm_reduce metrics entry (merged), got {len(found_reduce_keys)}: {found_reduce_keys}" + ) + + # Verify the merged metrics have the expected structure + reduce_metrics = metrics[found_reduce_keys[0]] + assert "api_calls" in reduce_metrics + assert "input_tokens" in reduce_metrics + assert "output_tokens" in reduce_metrics + assert "total_tokens" in reduce_metrics + assert "api_duration_ms" in reduce_metrics + assert "execution_time_ms" in reduce_metrics + assert "model_name" in reduce_metrics + assert reduce_metrics["model_name"] == test_model_name + assert "provider" in reduce_metrics + assert reduce_metrics["provider"] == provider + + +def test_aggregate_function_metrics_merging_multiple_groups( + integration_setup, model_config +): + """Test that each GROUP BY group produces one merged metrics entry""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + run_cli(duckdb_cli_path, db_path, "SELECT flock_reset_metrics();") + + test_model_name = f"test-merge-groups-model_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Call llm_reduce with multiple GROUP BY groups + # Each group should produce ONE merged metrics entry + query = ( + """ + SELECT + category, + llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'One word summary:', 'context_columns': [{'data': description}]} + ) AS summary, + flock_get_metrics() AS metrics + FROM VALUES + ('Electronics', 'High-performance laptop'), + ('Electronics', 'Latest smartphone'), + ('Clothing', 'Comfortable jacket'), + ('Clothing', 'Perfect fit jeans') + AS t(category, description) + GROUP BY category; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse CSV output - should have 2 rows (one per category) + reader = csv.DictReader(StringIO(result.stdout)) + rows = list(reader) + assert len(rows) == 2, f"Expected 2 rows (one per category), got {len(rows)}" + + # Check metrics from the last row (should have both groups merged) + metrics = json.loads(rows[-1]["metrics"]) + + # Should have exactly ONE llm_reduce entry (the last group's merged metrics) + # Note: In a GROUP BY query, each group processes independently, so we expect one entry per group + # But since we're checking the last row, we should see at least one merged entry + found_reduce_keys = [key for key in metrics.keys() if key.startswith("llm_reduce_")] + assert len(found_reduce_keys) >= 1, ( + f"Expected at least 1 llm_reduce metrics entry, got {len(found_reduce_keys)}: {found_reduce_keys}" + ) + + def test_multiple_aggregate_functions_sequential_numbering( integration_setup, model_config ): diff --git a/test/unit/functions/scalar/metrics_test.cpp b/test/unit/functions/scalar/metrics_test.cpp index a66e5637..1e233c2e 100644 --- a/test/unit/functions/scalar/metrics_test.cpp +++ b/test/unit/functions/scalar/metrics_test.cpp @@ -389,6 +389,72 @@ TEST_F(MetricsTest, MultipleAggregateFunctionsSequentialNumbering) { EXPECT_TRUE(found_2) << "llm_reduce_2 not found"; } +TEST_F(MetricsTest, AggregateFunctionMetricsMerging) { + auto* db = GetDatabase(); + const void* state_id1 = reinterpret_cast(0xAAAA); + const void* state_id2 = reinterpret_cast(0xBBBB); + const void* state_id3 = reinterpret_cast(0xCCCC); + + // Simulate multiple states being processed in a single aggregate call + // Each state tracks its own metrics + MetricsManager::StartInvocation(db, state_id1, FunctionType::LLM_REDUCE); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + MetricsManager::UpdateTokens(100, 50); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(100.0); + MetricsManager::AddExecutionTime(150.0); + + MetricsManager::StartInvocation(db, state_id2, FunctionType::LLM_REDUCE); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + MetricsManager::UpdateTokens(200, 100); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(200.0); + MetricsManager::AddExecutionTime(250.0); + + MetricsManager::StartInvocation(db, state_id3, FunctionType::LLM_REDUCE); + MetricsManager::SetModelInfo("gpt-4o", "openai"); + MetricsManager::UpdateTokens(150, 75); + MetricsManager::IncrementApiCalls(); + MetricsManager::AddApiDuration(150.0); + MetricsManager::AddExecutionTime(200.0); + + // Now merge all metrics into the first state + std::vector processed_state_ids = {state_id1, state_id2, state_id3}; + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_REDUCE, "gpt-4o", "openai"); + + auto& manager = GetMetricsManager(); + auto metrics = manager.GetMetrics(); + + // Should have exactly ONE llm_reduce entry (merged) + int reduce_count = 0; + int64_t total_input_tokens = 0; + int64_t total_output_tokens = 0; + int64_t total_api_calls = 0; + double total_api_duration = 0.0; + double total_execution_time = 0.0; + + for (const auto& [key, value]: metrics.items()) { + if (key.find("llm_reduce_") == 0) { + reduce_count++; + total_input_tokens += value["input_tokens"].get(); + total_output_tokens += value["output_tokens"].get(); + total_api_calls += value["api_calls"].get(); + total_api_duration += value["api_duration_ms"].get(); + total_execution_time += value["execution_time_ms"].get(); + } + } + + // Should have exactly one merged entry + EXPECT_EQ(reduce_count, 1) << "Expected exactly 1 merged llm_reduce metrics entry"; + + // Verify merged values are the sum of all states + EXPECT_EQ(total_input_tokens, 450) << "Merged input tokens should be sum of all states (100+200+150)"; + EXPECT_EQ(total_output_tokens, 225) << "Merged output tokens should be sum of all states (50+100+75)"; + EXPECT_EQ(total_api_calls, 3) << "Merged API calls should be sum of all states (1+1+1)"; + EXPECT_NEAR(total_api_duration, 450.0, 0.01) << "Merged API duration should be sum of all states (100+200+150)"; + EXPECT_NEAR(total_execution_time, 600.0, 0.01) << "Merged execution time should be sum of all states (150+250+200)"; +} + TEST_F(MetricsTest, AggregateFunctionDebugMetrics) { auto* db = GetDatabase(); const void* state_id = reinterpret_cast(0xDDDD); From ec3824a530c5078198ed20a2dab2ecbcb93fa6e6 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:38:00 -0500 Subject: [PATCH 21/59] Added URLHandler class for file download and validation utilities --- .../providers/handlers/url_handler.hpp | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/include/flock/model_manager/providers/handlers/url_handler.hpp diff --git a/src/include/flock/model_manager/providers/handlers/url_handler.hpp b/src/include/flock/model_manager/providers/handlers/url_handler.hpp new file mode 100644 index 00000000..33965f62 --- /dev/null +++ b/src/include/flock/model_manager/providers/handlers/url_handler.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include "flock/core/common.hpp" +#include +#include +#include +#include +#include + +namespace flock { + +class URLHandler { +public: + // Extract file extension from URL + static std::string ExtractFileExtension(const std::string& url) { + size_t last_dot = url.find_last_of('.'); + size_t last_slash = url.find_last_of('/'); + if (last_dot != std::string::npos && (last_slash == std::string::npos || last_dot > last_slash)) { + size_t query_pos = url.find_first_of('?', last_dot); + if (query_pos != std::string::npos) { + return url.substr(last_dot, query_pos - last_dot); + } else { + return url.substr(last_dot); + } + } + return "";// No extension found + } + + // Generate a unique temporary filename with extension + static std::string GenerateTempFilename(const std::string& extension) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 15); + std::ostringstream oss; + oss << "/tmp/flock_"; + for (int i = 0; i < 16; ++i) { + oss << std::hex << dis(gen); + } + oss << extension; + return oss.str(); + } + + // Check if the given path is a URL + static bool IsUrl(const std::string& path) { + return path.find("http://") == 0 || path.find("https://") == 0; + } + + // Validate file exists and is not empty + static bool ValidateFile(const std::string& file_path) { + FILE* f = fopen(file_path.c_str(), "rb"); + if (!f) { + return false; + } + fseek(f, 0, SEEK_END); + long file_size = ftell(f); + fclose(f); + return file_size > 0; + } + + // Download file from URL to temporary location + static std::string DownloadFileToTemp(const std::string& url) { + std::string extension = ExtractFileExtension(url); + // If no extension found, try to infer from content-type or use empty extension + std::string temp_filename = GenerateTempFilename(extension); + + // Download file using curl + CURL* curl = curl_easy_init(); + if (!curl) { + return ""; + } + + FILE* file = fopen(temp_filename.c_str(), "wb"); + if (!file) { + curl_easy_cleanup(curl); + return ""; + } + + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt( + curl, CURLOPT_WRITEFUNCTION, +[](void* ptr, size_t size, size_t nmemb, void* stream) -> size_t { return fwrite(ptr, size, nmemb, static_cast(stream)); }); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, file); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + + CURLcode res = curl_easy_perform(curl); + fclose(file); + long response_code; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + curl_easy_cleanup(curl); + + if (res != CURLE_OK || response_code != 200) { + std::remove(temp_filename.c_str()); + return ""; + } + + return temp_filename; + } + + // Helper struct to return file path and temp file flag + struct FilePathResult { + std::string file_path; + bool is_temp_file; + }; + + // Resolve file path: download if URL, validate, and return result + // Throws std::runtime_error if download or validation fails + static FilePathResult ResolveFilePath(const std::string& file_path_or_url) { + FilePathResult result; + + if (IsUrl(file_path_or_url)) { + result.file_path = DownloadFileToTemp(file_path_or_url); + if (result.file_path.empty()) { + throw std::runtime_error("Failed to download file: " + file_path_or_url); + } + result.is_temp_file = true; + } else { + result.file_path = file_path_or_url; + result.is_temp_file = false; + } + + if (!ValidateFile(result.file_path)) { + if (result.is_temp_file) { + std::remove(result.file_path.c_str()); + } + throw std::runtime_error("Invalid file: " + file_path_or_url); + } + + return result; + } +}; + +}// namespace flock From 50d3189db940de83cfd679ba35a9b9bad16d06db Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:38:15 -0500 Subject: [PATCH 22/59] Refactored ExecuteBatch to use RequestType enum for unified request handling --- .../providers/handlers/base_handler.hpp | 185 +++++++++++++++--- .../providers/handlers/handler.hpp | 7 +- 2 files changed, 159 insertions(+), 33 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 6c21aa47..2aebb11c 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -5,7 +5,12 @@ #include "flock/model_manager/providers/handlers/handler.hpp" #include "session.hpp" #include +#include +#include +#include #include +#include +#include namespace flock { @@ -15,50 +20,143 @@ class BaseModelProviderHandler : public IModelProviderHandler { : _throw_exception(throw_exception) {} virtual ~BaseModelProviderHandler() = default; - void AddRequest(const nlohmann::json& json, RequestType type = RequestType::Completion) { + void AddRequest(const nlohmann::json& json, RequestType type = RequestType::Completion) override { _request_batch.push_back(json); + _request_types.push_back(type); } std::vector CollectCompletions(const std::string& contentType = "application/json") { std::vector completions; - if (!_request_batch.empty()) completions = ExecuteBatch(_request_batch, true, contentType, true); + if (!_request_batch.empty()) completions = ExecuteBatch(_request_batch, true, contentType, RequestType::Completion); _request_batch.clear(); return completions; } std::vector CollectEmbeddings(const std::string& contentType = "application/json") { std::vector embeddings; - if (!_request_batch.empty()) embeddings = ExecuteBatch(_request_batch, true, contentType, false); + if (!_request_batch.empty()) embeddings = ExecuteBatch(_request_batch, true, contentType, RequestType::Embedding); _request_batch.clear(); return embeddings; } - std::vector ExecuteBatch(const std::vector& jsons, bool async = true, const std::string& contentType = "application/json", bool is_completion = true) { + + std::vector CollectTranscriptions(const std::string& contentType = "multipart/form-data") override { + std::vector transcriptions; + if (!_request_batch.empty()) { + std::vector transcription_batch; + for (size_t i = 0; i < _request_batch.size(); ++i) { + if (_request_types[i] == RequestType::Transcription) { + transcription_batch.push_back(_request_batch[i]); + } + } + + if (!transcription_batch.empty()) { + transcriptions = ExecuteBatch(transcription_batch, true, contentType, RequestType::Transcription); + // Remove transcription requests from batch + for (size_t i = _request_batch.size(); i > 0; --i) { + if (_request_types[i - 1] == RequestType::Transcription) { + _request_batch.erase(_request_batch.begin() + i - 1); + _request_types.erase(_request_types.begin() + i - 1); + } + } + } + } + return transcriptions; + } + + +public: +protected: + std::vector ExecuteBatch(const std::vector& jsons, bool async = true, const std::string& contentType = "application/json", RequestType request_type = RequestType::Completion) { struct CurlRequestData { std::string response; CURL* easy = nullptr; std::string payload; + curl_mime* mime_form = nullptr; + std::string temp_file_path; + bool is_temp_file; }; std::vector requests(jsons.size()); CURLM* multi_handle = curl_multi_init(); - struct curl_slist* headers = nullptr; - headers = curl_slist_append(headers, "Content-Type: application/json"); - for (const auto& h: getExtraHeaders()) { - headers = curl_slist_append(headers, h.c_str()); + + // Determine URL based on request type + std::string url; + bool is_transcription = (request_type == RequestType::Transcription); + bool is_completion = (request_type == RequestType::Completion); + if (is_transcription) { + url = getTranscriptionUrl(); + } else if (is_completion) { + url = getCompletionUrl(); + } else { + url = getEmbedUrl(); } - auto url = is_completion ? getCompletionUrl() : getEmbedUrl(); + + // Prepare all requests for (size_t i = 0; i < jsons.size(); ++i) { - requests[i].payload = jsons[i].dump(); requests[i].easy = curl_easy_init(); curl_easy_setopt(requests[i].easy, CURLOPT_URL, url.c_str()); - curl_easy_setopt(requests[i].easy, CURLOPT_HTTPHEADER, headers); - curl_easy_setopt(requests[i].easy, CURLOPT_WRITEFUNCTION, +[](char* ptr, size_t size, size_t nmemb, void* userdata) -> size_t { + + if (is_transcription) { + // Handle transcription requests (multipart/form-data) + const auto& req = jsons[i]; + auto file_path = req["file_path"].get(); + auto model = req["model"].get(); + auto prompt = req.contains("prompt") ? req["prompt"].get() : ""; + requests[i].is_temp_file = req.contains("is_temp_file") ? req["is_temp_file"].get() : false; + if (requests[i].is_temp_file) { + requests[i].temp_file_path = file_path; + } + + // Set up multipart form data + requests[i].mime_form = curl_mime_init(requests[i].easy); + curl_mimepart* field = curl_mime_addpart(requests[i].mime_form); + curl_mime_name(field, "file"); + curl_mime_filedata(field, file_path.c_str()); + + field = curl_mime_addpart(requests[i].mime_form); + curl_mime_name(field, "model"); + curl_mime_data(field, model.c_str(), CURL_ZERO_TERMINATED); + + field = curl_mime_addpart(requests[i].mime_form); + curl_mime_name(field, "response_format"); + curl_mime_data(field, "json", CURL_ZERO_TERMINATED); + + if (!prompt.empty()) { + field = curl_mime_addpart(requests[i].mime_form); + curl_mime_name(field, "prompt"); + curl_mime_data(field, prompt.c_str(), CURL_ZERO_TERMINATED); + } + + curl_easy_setopt(requests[i].easy, CURLOPT_MIMEPOST, requests[i].mime_form); + + // Set headers + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Expect:"); + for (const auto& h: getExtraHeaders()) { + headers = curl_slist_append(headers, h.c_str()); + } + curl_easy_setopt(requests[i].easy, CURLOPT_HTTPHEADER, headers); + } else { + // Handle JSON requests (completions/embeddings) + requests[i].payload = jsons[i].dump(); + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + for (const auto& h: getExtraHeaders()) { + headers = curl_slist_append(headers, h.c_str()); + } + curl_easy_setopt(requests[i].easy, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(requests[i].easy, CURLOPT_POST, 1L); + curl_easy_setopt(requests[i].easy, CURLOPT_POSTFIELDS, requests[i].payload.c_str()); + } + + // Set response callback + curl_easy_setopt( + requests[i].easy, CURLOPT_WRITEFUNCTION, +[](char* ptr, size_t size, size_t nmemb, void* userdata) -> size_t { std::string* resp = static_cast(userdata); resp->append(ptr, size * nmemb); return size * nmemb; }); curl_easy_setopt(requests[i].easy, CURLOPT_WRITEDATA, &requests[i].response); - curl_easy_setopt(requests[i].easy, CURLOPT_POST, 1L); - curl_easy_setopt(requests[i].easy, CURLOPT_POSTFIELDS, requests[i].payload.c_str()); + curl_multi_add_handle(multi_handle, requests[i].easy); } @@ -80,38 +178,49 @@ class BaseModelProviderHandler : public IModelProviderHandler { std::vector results(jsons.size()); for (size_t i = 0; i < requests.size(); ++i) { + // Clean up temp files for transcriptions + if (is_transcription && requests[i].is_temp_file && !requests[i].temp_file_path.empty()) { + std::remove(requests[i].temp_file_path.c_str()); + } + curl_easy_getinfo(requests[i].easy, CURLINFO_RESPONSE_CODE, NULL); - if (!requests[i].response.empty() && isJson(requests[i].response)) { + if (isJson(requests[i].response)) { try { nlohmann::json parsed = nlohmann::json::parse(requests[i].response); - checkResponse(parsed, is_completion); - - auto [input_tokens, output_tokens] = ExtractTokenUsage(parsed); - batch_input_tokens += input_tokens; - batch_output_tokens += output_tokens; + checkResponse(parsed, request_type); - if (is_completion) { - results[i] = ExtractCompletionOutput(parsed); - } else { - results[i] = ExtractEmbeddingVector(parsed); + // Extract token usage for completions/embeddings + if (!is_transcription) { + auto [input_tokens, output_tokens] = ExtractTokenUsage(parsed); + batch_input_tokens += input_tokens; + batch_output_tokens += output_tokens; } + + // Let provider extract output based on request type + results[i] = ExtractOutput(parsed, request_type); } catch (const std::exception& e) { - trigger_error(std::string("JSON parse error: ") + e.what()); + trigger_error(std::string("Response processing error: ") + e.what()); } } else { - trigger_error("Empty or invalid response in batch"); + trigger_error("Invalid JSON response: " + requests[i].response); + } + + // Clean up mime form for transcriptions + if (is_transcription && requests[i].mime_form) { + curl_mime_free(requests[i].mime_form); } curl_multi_remove_handle(multi_handle, requests[i].easy); curl_easy_cleanup(requests[i].easy); } - MetricsManager::UpdateTokens(batch_input_tokens, batch_output_tokens); + if (!is_transcription) { + MetricsManager::UpdateTokens(batch_input_tokens, batch_output_tokens); + } MetricsManager::AddApiDuration(api_duration_ms); for (size_t i = 0; i < jsons.size(); ++i) { MetricsManager::IncrementApiCalls(); } - curl_slist_free_all(headers); curl_multi_cleanup(multi_handle); return results; } @@ -122,14 +231,28 @@ class BaseModelProviderHandler : public IModelProviderHandler { protected: bool _throw_exception; std::vector _request_batch; + std::vector _request_types; virtual std::string getCompletionUrl() const = 0; virtual std::string getEmbedUrl() const = 0; + virtual std::string getTranscriptionUrl() const = 0; virtual void prepareSessionForRequest(const std::string& url) = 0; virtual std::vector getExtraHeaders() const { return {}; } - virtual void checkProviderSpecificResponse(const nlohmann::json&, bool is_completion) {} + virtual void checkProviderSpecificResponse(const nlohmann::json&, RequestType request_type) {} virtual nlohmann::json ExtractCompletionOutput(const nlohmann::json&) const { return {}; } virtual nlohmann::json ExtractEmbeddingVector(const nlohmann::json&) const { return {}; } + virtual nlohmann::json ExtractTranscriptionOutput(const nlohmann::json&) const = 0; + + // Unified extraction method - delegates to specific Extract* methods based on request type + nlohmann::json ExtractOutput(const nlohmann::json& parsed, RequestType request_type) const { + if (request_type == RequestType::Completion) { + return ExtractCompletionOutput(parsed); + } else if (request_type == RequestType::Embedding) { + return ExtractEmbeddingVector(parsed); + } else { + return ExtractTranscriptionOutput(parsed); + } + } virtual std::pair ExtractTokenUsage(const nlohmann::json& response) const = 0; void trigger_error(const std::string& msg) { @@ -140,14 +263,14 @@ class BaseModelProviderHandler : public IModelProviderHandler { } } - void checkResponse(const nlohmann::json& json, bool is_completion) { + void checkResponse(const nlohmann::json& json, RequestType request_type) { if (json.contains("error")) { auto reason = json["error"].dump(); trigger_error(reason); std::cerr << ">> response error :\n" << json.dump(2) << "\n"; } - checkProviderSpecificResponse(json, is_completion); + checkProviderSpecificResponse(json, request_type); } bool isJson(const std::string& data) { diff --git a/src/include/flock/model_manager/providers/handlers/handler.hpp b/src/include/flock/model_manager/providers/handlers/handler.hpp index 51fe282a..11540de5 100644 --- a/src/include/flock/model_manager/providers/handlers/handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/handler.hpp @@ -8,16 +8,19 @@ namespace flock { class IModelProviderHandler { public: enum class RequestType { Completion, - Embedding }; + Embedding, + Transcription }; virtual ~IModelProviderHandler() = default; - // AddRequest: type distinguishes between completion and embedding (default: Completion) + // AddRequest: type distinguishes between completion, embedding, and transcription (default: Completion) virtual void AddRequest(const nlohmann::json& json, RequestType type = RequestType::Completion) = 0; // CollectCompletions: process all as completions, then clear virtual std::vector CollectCompletions(const std::string& contentType = "application/json") = 0; // CollectEmbeddings: process all as embeddings, then clear virtual std::vector CollectEmbeddings(const std::string& contentType = "application/json") = 0; + // CollectTranscriptions: process all transcriptions, then clear + virtual std::vector CollectTranscriptions(const std::string& contentType = "multipart/form-data") = 0; }; }// namespace flock From 42264ebbe7d36a9e8ad9e5b6540fcf524029beb8 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:38:26 -0500 Subject: [PATCH 23/59] Implemented ExtractTranscriptionOutput for OpenAI, Azure, and Ollama handlers --- .../providers/handlers/azure.hpp | 20 ++++++++++++++++++- .../providers/handlers/ollama.hpp | 13 +++++++++++- .../providers/handlers/openai.hpp | 18 ++++++++++++++++- 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/azure.hpp b/src/include/flock/model_manager/providers/handlers/azure.hpp index 26d8ac41..3fa34efa 100644 --- a/src/include/flock/model_manager/providers/handlers/azure.hpp +++ b/src/include/flock/model_manager/providers/handlers/azure.hpp @@ -18,7 +18,11 @@ class AzureModelManager : public BaseModelProviderHandler { AzureModelManager& operator=(AzureModelManager&&) = delete; protected: - void checkProviderSpecificResponse(const nlohmann::json& response, bool is_completion) override { + void checkProviderSpecificResponse(const nlohmann::json& response, RequestType request_type) override { + if (request_type == RequestType::Transcription) { + return;// No specific checks needed for transcriptions + } + bool is_completion = (request_type == RequestType::Completion); if (is_completion) { if (response.contains("choices") && response["choices"].is_array() && !response["choices"].empty()) { const auto& choice = response["choices"][0]; @@ -43,6 +47,10 @@ class AzureModelManager : public BaseModelProviderHandler { return "https://" + _resource_name + ".openai.azure.com/openai/deployments/" + _deployment_model_name + "/embeddings?api-version=" + _api_version; } + std::string getTranscriptionUrl() const override { + return "https://" + _resource_name + ".openai.azure.com/openai/deployments/" + + _deployment_model_name + "/audio/transcriptions?api-version=" + _api_version; + } void prepareSessionForRequest(const std::string& url) override { _session.setUrl(url); } @@ -80,6 +88,16 @@ class AzureModelManager : public BaseModelProviderHandler { return {input_tokens, output_tokens}; } + + nlohmann::json ExtractTranscriptionOutput(const nlohmann::json& response) const override { + // Transcription API returns JSON with "text" field when response_format=json + if (response.contains("text")) { + return response["text"].get(); + } + return ""; + } + + std::string _token; std::string _resource_name; std::string _deployment_model_name; diff --git a/src/include/flock/model_manager/providers/handlers/ollama.hpp b/src/include/flock/model_manager/providers/handlers/ollama.hpp index aae9a26c..27cf9f6d 100644 --- a/src/include/flock/model_manager/providers/handlers/ollama.hpp +++ b/src/include/flock/model_manager/providers/handlers/ollama.hpp @@ -20,6 +20,7 @@ class OllamaModelManager : public BaseModelProviderHandler { protected: std::string getCompletionUrl() const override { return _url + "/api/generate"; } std::string getEmbedUrl() const override { return _url + "/api/embed"; } + std::string getTranscriptionUrl() const override { return ""; } void prepareSessionForRequest(const std::string& url) override { _session.setUrl(url); } void setParameters(const std::string& data, const std::string& contentType = "") override { if (contentType != "multipart/form-data") { @@ -29,7 +30,11 @@ class OllamaModelManager : public BaseModelProviderHandler { auto postRequest(const std::string& contentType) -> decltype(((Session*) nullptr)->postPrepareOllama(contentType)) override { return _session.postPrepareOllama(contentType); } - void checkProviderSpecificResponse(const nlohmann::json& response, bool is_completion) override { + void checkProviderSpecificResponse(const nlohmann::json& response, RequestType request_type) override { + if (request_type == RequestType::Transcription) { + return;// No specific checks needed for transcriptions + } + bool is_completion = (request_type == RequestType::Completion); if (is_completion) { if ((response.contains("done_reason") && response["done_reason"] != "stop") || (response.contains("done") && !response["done"].is_null() && response["done"].get() != true)) { @@ -68,6 +73,12 @@ class OllamaModelManager : public BaseModelProviderHandler { return {input_tokens, output_tokens}; } + + nlohmann::json ExtractTranscriptionOutput(const nlohmann::json& response) const override { + throw std::runtime_error("Audio transcription is not supported for Ollama provider, use Azure or OpenAI instead."); + } + + Session _session; std::string _url; }; diff --git a/src/include/flock/model_manager/providers/handlers/openai.hpp b/src/include/flock/model_manager/providers/handlers/openai.hpp index 44895778..064ba08c 100644 --- a/src/include/flock/model_manager/providers/handlers/openai.hpp +++ b/src/include/flock/model_manager/providers/handlers/openai.hpp @@ -35,6 +35,9 @@ class OpenAIModelManager : public BaseModelProviderHandler { std::string getEmbedUrl() const override { return _api_base_url + "embeddings"; } + std::string getTranscriptionUrl() const override { + return _api_base_url + "audio/transcriptions"; + } void prepareSessionForRequest(const std::string& url) override { _session.setUrl(url); } @@ -49,7 +52,11 @@ class OpenAIModelManager : public BaseModelProviderHandler { std::vector getExtraHeaders() const override { return {"Authorization: Bearer " + _token}; } - void checkProviderSpecificResponse(const nlohmann::json& response, bool is_completion) override { + void checkProviderSpecificResponse(const nlohmann::json& response, RequestType request_type) override { + if (request_type == RequestType::Transcription) { + return;// No specific checks needed for transcriptions + } + bool is_completion = (request_type == RequestType::Completion); if (is_completion) { if (response.contains("choices") && response["choices"].is_array() && !response["choices"].empty()) { const auto& choice = response["choices"][0]; @@ -101,6 +108,15 @@ class OpenAIModelManager : public BaseModelProviderHandler { } return {input_tokens, output_tokens}; } + + + nlohmann::json ExtractTranscriptionOutput(const nlohmann::json& response) const override { + // Transcription API returns JSON with "text" field when response_format=json + if (response.contains("text")) { + return response["text"].get(); + } + return ""; + } }; }// namespace flock From 17a619223d484fe421689d9689b9a8800f8547c1 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:38:34 -0500 Subject: [PATCH 24/59] Added AddTranscriptionRequest implementation for OpenAI, Azure, and Ollama providers --- .../providers/adapters/azure.hpp | 1 + .../providers/adapters/ollama.hpp | 1 + .../providers/adapters/openai.hpp | 1 + .../providers/adapters/azure.cpp | 42 ++++++++++++++----- .../providers/adapters/ollama.cpp | 18 +++++--- .../providers/adapters/openai.cpp | 42 ++++++++++++++----- 6 files changed, 80 insertions(+), 25 deletions(-) diff --git a/src/include/flock/model_manager/providers/adapters/azure.hpp b/src/include/flock/model_manager/providers/adapters/azure.hpp index a4493d82..0a93a231 100644 --- a/src/include/flock/model_manager/providers/adapters/azure.hpp +++ b/src/include/flock/model_manager/providers/adapters/azure.hpp @@ -15,6 +15,7 @@ class AzureProvider : public IProvider { void AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) override; void AddEmbeddingRequest(const std::vector& inputs) override; + void AddTranscriptionRequest(const nlohmann::json& audio_files) override; }; }// namespace flock diff --git a/src/include/flock/model_manager/providers/adapters/ollama.hpp b/src/include/flock/model_manager/providers/adapters/ollama.hpp index 7f0c62c8..0ed7d44d 100644 --- a/src/include/flock/model_manager/providers/adapters/ollama.hpp +++ b/src/include/flock/model_manager/providers/adapters/ollama.hpp @@ -13,6 +13,7 @@ class OllamaProvider : public IProvider { void AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) override; void AddEmbeddingRequest(const std::vector& inputs) override; + void AddTranscriptionRequest(const nlohmann::json& audio_files) override; }; }// namespace flock diff --git a/src/include/flock/model_manager/providers/adapters/openai.hpp b/src/include/flock/model_manager/providers/adapters/openai.hpp index a9d104c8..9b416c44 100644 --- a/src/include/flock/model_manager/providers/adapters/openai.hpp +++ b/src/include/flock/model_manager/providers/adapters/openai.hpp @@ -18,6 +18,7 @@ class OpenAIProvider : public IProvider { void AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) override; void AddEmbeddingRequest(const std::vector& inputs) override; + void AddTranscriptionRequest(const nlohmann::json& audio_files) override; }; }// namespace flock diff --git a/src/model_manager/providers/adapters/azure.cpp b/src/model_manager/providers/adapters/azure.cpp index 7a883f27..753076e9 100644 --- a/src/model_manager/providers/adapters/azure.cpp +++ b/src/model_manager/providers/adapters/azure.cpp @@ -1,4 +1,6 @@ #include "flock/model_manager/providers/adapters/azure.hpp" +#include "flock/model_manager/model.hpp" +#include "flock/model_manager/providers/handlers/url_handler.hpp" namespace flock { @@ -8,17 +10,22 @@ void AzureProvider::AddCompletionRequest(const std::string& prompt, const int nu message_content.push_back({{"type", "text"}, {"text", prompt}}); - if (!media_data.empty()) { - auto detail = media_data[0].contains("detail") ? media_data[0]["detail"].get() : "low"; - auto image_type = media_data[0]["type"].get(); - auto mime_type = std::string("image/"); - if (size_t pos = image_type.find("/"); pos != std::string::npos) { - mime_type += image_type.substr(pos + 1); - } else { - mime_type += std::string("png"); - } + // Process image columns + if (media_data.contains("image") && !media_data["image"].empty() && media_data["image"].is_array()) { + std::string detail = "low"; auto column_index = 1u; - for (const auto& column: media_data) { + for (const auto& column: media_data["image"]) { + // Process image column as before + if (column_index == 1) { + detail = column.contains("detail") ? column["detail"].get() : "low"; + } + auto image_type = column.contains("type") ? column["type"].get() : "image"; + auto mime_type = std::string("image/"); + if (size_t pos = image_type.find("/"); pos != std::string::npos) { + mime_type += image_type.substr(pos + 1); + } else { + mime_type += std::string("png"); + } message_content.push_back( {{"type", "text"}, {"text", "ATTACHMENT COLUMN"}}); @@ -83,4 +90,19 @@ void AzureProvider::AddEmbeddingRequest(const std::vector& inputs) } } +void AzureProvider::AddTranscriptionRequest(const nlohmann::json& audio_files) { + for (const auto& audio_file: audio_files) { + auto audio_file_str = audio_file.get(); + + // Handle file download and validation + auto file_result = URLHandler::ResolveFilePath(audio_file_str); + + nlohmann::json transcription_request = { + {"file_path", file_result.file_path}, + {"model", model_details_.model}, + {"is_temp_file", file_result.is_temp_file}}; + model_handler_->AddRequest(transcription_request, IModelProviderHandler::RequestType::Transcription); + } +} + }// namespace flock \ No newline at end of file diff --git a/src/model_manager/providers/adapters/ollama.cpp b/src/model_manager/providers/adapters/ollama.cpp index 2e5cd2a1..07b619d8 100644 --- a/src/model_manager/providers/adapters/ollama.cpp +++ b/src/model_manager/providers/adapters/ollama.cpp @@ -1,4 +1,5 @@ #include "flock/model_manager/providers/adapters/ollama.hpp" +#include "flock/model_manager/providers/handlers/url_handler.hpp" namespace flock { @@ -8,11 +9,14 @@ void OllamaProvider::AddCompletionRequest(const std::string& prompt, const int n {"stream", false}}; auto images = nlohmann::json::array(); - if (!media_data.empty()) { - for (const auto& column: media_data) { - for (const auto& image: column["data"]) { - auto image_str = image.get(); - images.push_back(image_str); + // Process image columns + if (media_data.contains("image") && !media_data["image"].empty() && media_data["image"].is_array()) { + for (const auto& column: media_data["image"]) { + if (column.contains("data") && column["data"].is_array()) { + for (const auto& image: column["data"]) { + auto image_str = image.get(); + images.push_back(image_str); + } } } } @@ -51,4 +55,8 @@ void OllamaProvider::AddEmbeddingRequest(const std::vector& inputs) } } +void OllamaProvider::AddTranscriptionRequest(const nlohmann::json& audio_files) { + throw std::runtime_error("Audio transcription is not currently supported by Ollama."); +} + }// namespace flock \ No newline at end of file diff --git a/src/model_manager/providers/adapters/openai.cpp b/src/model_manager/providers/adapters/openai.cpp index de1a2c0c..1eb14fc5 100644 --- a/src/model_manager/providers/adapters/openai.cpp +++ b/src/model_manager/providers/adapters/openai.cpp @@ -1,4 +1,6 @@ #include "flock/model_manager/providers/adapters/openai.hpp" +#include "flock/model_manager/model.hpp" +#include "flock/model_manager/providers/handlers/url_handler.hpp" #include namespace flock { @@ -8,17 +10,22 @@ void OpenAIProvider::AddCompletionRequest(const std::string& prompt, const int n message_content.push_back({{"type", "text"}, {"text", prompt}}); - if (!media_data.empty()) { - auto detail = media_data[0].contains("detail") ? media_data[0]["detail"].get() : "low"; - auto image_type = media_data[0]["type"].get(); - auto mime_type = std::string("image/"); - if (size_t pos = image_type.find("/"); pos != std::string::npos) { - mime_type += image_type.substr(pos + 1); - } else { - mime_type += std::string("png"); - } + // Process image columns + if (media_data.contains("image") && !media_data["image"].empty() && media_data["image"].is_array()) { + std::string detail = "low"; auto column_index = 1u; - for (const auto& column: media_data) { + for (const auto& column: media_data["image"]) { + // Process image column as before + if (column_index == 1) { + detail = column.contains("detail") ? column["detail"].get() : "low"; + } + auto image_type = column.contains("type") ? column["type"].get() : "image"; + auto mime_type = std::string("image/"); + if (size_t pos = image_type.find("/"); pos != std::string::npos) { + mime_type += image_type.substr(pos + 1); + } else { + mime_type += std::string("png"); + } message_content.push_back( {{"type", "text"}, {"text", "ATTACHMENT COLUMN"}}); @@ -81,4 +88,19 @@ void OpenAIProvider::AddEmbeddingRequest(const std::vector& inputs) model_handler_->AddRequest(request_payload, IModelProviderHandler::RequestType::Embedding); } +void OpenAIProvider::AddTranscriptionRequest(const nlohmann::json& audio_files) { + for (const auto& audio_file: audio_files) { + auto audio_file_str = audio_file.get(); + + // Handle file download and validation + auto file_result = URLHandler::ResolveFilePath(audio_file_str); + + nlohmann::json transcription_request = { + {"file_path", file_result.file_path}, + {"model", model_details_.model}, + {"is_temp_file", file_result.is_temp_file}}; + model_handler_->AddRequest(transcription_request, IModelProviderHandler::RequestType::Transcription); + } +} + }// namespace flock From c0adbc36ac4e758925bc753a88e71b6cabafbead Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:38:49 -0500 Subject: [PATCH 25/59] Added transcription request methods to IProvider interface and Model class --- src/include/flock/model_manager/model.hpp | 2 ++ src/include/flock/model_manager/providers/provider.hpp | 4 ++++ src/model_manager/model.cpp | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/src/include/flock/model_manager/model.hpp b/src/include/flock/model_manager/model.hpp index ee3085b5..fc17acd0 100644 --- a/src/include/flock/model_manager/model.hpp +++ b/src/include/flock/model_manager/model.hpp @@ -26,8 +26,10 @@ class Model { explicit Model() = default; void AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type = OutputType::STRING, const nlohmann::json& media_data = nlohmann::json::object()); void AddEmbeddingRequest(const std::vector& inputs); + void AddTranscriptionRequest(const nlohmann::json& audio_files); std::vector CollectCompletions(const std::string& contentType = "application/json"); std::vector CollectEmbeddings(const std::string& contentType = "application/json"); + std::vector CollectTranscriptions(const std::string& contentType = "multipart/form-data"); ModelDetails GetModelDetails(); static void SetMockProvider(const std::shared_ptr& mock_provider) { diff --git a/src/include/flock/model_manager/providers/provider.hpp b/src/include/flock/model_manager/providers/provider.hpp index 74c302a2..cedc57f2 100644 --- a/src/include/flock/model_manager/providers/provider.hpp +++ b/src/include/flock/model_manager/providers/provider.hpp @@ -29,6 +29,7 @@ class IProvider { virtual void AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) = 0; virtual void AddEmbeddingRequest(const std::vector& inputs) = 0; + virtual void AddTranscriptionRequest(const nlohmann::json& audio_files) = 0; virtual std::vector CollectCompletions(const std::string& contentType = "application/json") { return model_handler_->CollectCompletions(contentType); @@ -36,6 +37,9 @@ class IProvider { virtual std::vector CollectEmbeddings(const std::string& contentType = "application/json") { return model_handler_->CollectEmbeddings(contentType); } + virtual std::vector CollectTranscriptions(const std::string& contentType = "multipart/form-data") { + return model_handler_->CollectTranscriptions(contentType); + } static std::string GetOutputTypeString(const OutputType output_type) { switch (output_type) { diff --git a/src/model_manager/model.cpp b/src/model_manager/model.cpp index 24a64dcc..3a150618 100644 --- a/src/model_manager/model.cpp +++ b/src/model_manager/model.cpp @@ -106,6 +106,10 @@ void Model::AddEmbeddingRequest(const std::vector& inputs) { provider_->AddEmbeddingRequest(inputs); } +void Model::AddTranscriptionRequest(const nlohmann::json& audio_files) { + provider_->AddTranscriptionRequest(audio_files); +} + std::vector Model::CollectCompletions(const std::string& contentType) { return provider_->CollectCompletions(contentType); } @@ -114,4 +118,8 @@ std::vector Model::CollectEmbeddings(const std::string& contentT return provider_->CollectEmbeddings(contentType); } +std::vector Model::CollectTranscriptions(const std::string& contentType) { + return provider_->CollectTranscriptions(contentType); +} + }// namespace flock From f4be1e29700f1c02a49c1917eada82236619903a Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:39:32 -0500 Subject: [PATCH 26/59] Added audio transcription support to prompt manager and input parser --- src/core/config/model.cpp | 2 + src/functions/input_parser.cpp | 45 ++++++++++++++++--- .../flock/prompt_manager/prompt_manager.hpp | 29 ++++++++++-- src/prompt_manager/prompt_manager.cpp | 31 +++++++++++++ 4 files changed, 97 insertions(+), 10 deletions(-) diff --git a/src/core/config/model.cpp b/src/core/config/model.cpp index b08e5e92..310b08f4 100644 --- a/src/core/config/model.cpp +++ b/src/core/config/model.cpp @@ -32,6 +32,8 @@ void Config::SetupDefaultModelsConfig(duckdb::Connection& con, std::string& sche "('default', 'gpt-4o-mini', 'openai'), " "('gpt-4o-mini', 'gpt-4o-mini', 'openai'), " "('gpt-4o', 'gpt-4o', 'openai'), " + "('gpt-4o-transcribe', 'gpt-4o-transcribe', 'openai')," + "('gpt-4o-mini-transcribe', 'gpt-4o-mini-transcribe', 'openai')," "('text-embedding-3-large', 'text-embedding-3-large', 'openai'), " "('text-embedding-3-small', 'text-embedding-3-small', 'openai');", schema_name, table_name)); diff --git a/src/functions/input_parser.cpp b/src/functions/input_parser.cpp index 9b1e23a5..e5e304da 100644 --- a/src/functions/input_parser.cpp +++ b/src/functions/input_parser.cpp @@ -4,6 +4,40 @@ namespace flock { +// Helper function to validate and clean context column, handling NULL values +static void ValidateAndCleanContextColumn(nlohmann::json& column, const std::initializer_list& allowed_keys) { + std::string column_type = ""; + bool has_type = false; + bool has_transcription_model = false; + + for (const auto& key: allowed_keys) { + if (key != std::string("data")) { + bool key_exists = column.contains(key); + bool is_null = key_exists && column[key].get() == "NULL"; + + if (key == std::string("type") && key_exists && !is_null) { + column_type = column[key].get(); + has_type = true; + } else if (key == std::string("transcription_model") && key_exists && !is_null) { + has_transcription_model = true; + } else if (!key_exists || is_null) { + column.erase(key); + } + } + } + + // Validate transcription_model is only used with audio type + if (has_transcription_model && column_type != "audio") { + std::string type_display = has_type ? column_type : "tabular"; + throw std::runtime_error(duckdb_fmt::format("Argument 'transcription_model' is not supported for data type '{}'. It can only be used with type 'audio'.", type_display)); + } + + // Validate that audio type requires transcription_model + if (has_type && column_type == "audio" && !has_transcription_model) { + throw std::runtime_error("Argument 'transcription_model' is required when type is 'audio'."); + } +} + nlohmann::json CastVectorOfStructsToJson(const duckdb::Vector& struct_vector, const int size) { nlohmann::json struct_json; @@ -20,28 +54,25 @@ nlohmann::json CastVectorOfStructsToJson(const duckdb::Vector& struct_vector, co for (auto context_column_idx = 0; context_column_idx < static_cast(context_columns.size()); context_column_idx++) { auto context_column = context_columns[context_column_idx]; auto context_column_json = CastVectorOfStructsToJson(duckdb::Vector(context_column), 1); - auto allowed_keys = {"name", "data", "type", "detail"}; + auto allowed_keys = {"name", "data", "type", "detail", "transcription_model"}; for (const auto& key: context_column_json.items()) { if (std::find(std::begin(allowed_keys), std::end(allowed_keys), key.key()) == std::end(allowed_keys)) { throw std::runtime_error(duckdb_fmt::format("Unexpected key in 'context_columns': {}", key.key())); } } + auto required_keys = {"data"}; for (const auto& key: required_keys) { if (!context_column_json.contains(key) || (key != "data" && context_column_json[key].get() == "NULL")) { throw std::runtime_error(duckdb_fmt::format("Expected 'context_columns' to contain key: {}", key)); } } + if (struct_json.contains("context_columns") && struct_json["context_columns"].size() == context_columns.size()) { struct_json["context_columns"][context_column_idx]["data"].push_back(context_column_json["data"]); } else { struct_json["context_columns"].push_back(context_column_json); - for (const auto& key: allowed_keys) { - if (key != "data" && (!struct_json["context_columns"][context_column_idx].contains(key) || - struct_json["context_columns"][context_column_idx][key].get() == "NULL")) { - struct_json["context_columns"][context_column_idx].erase(key); - } - } + ValidateAndCleanContextColumn(struct_json["context_columns"][context_column_idx], allowed_keys); struct_json["context_columns"][context_column_idx]["data"] = nlohmann::json::array(); struct_json["context_columns"][context_column_idx]["data"].push_back(context_column_json["data"]); } diff --git a/src/include/flock/prompt_manager/prompt_manager.hpp b/src/include/flock/prompt_manager/prompt_manager.hpp index 9d3e203e..9f46b6b6 100644 --- a/src/include/flock/prompt_manager/prompt_manager.hpp +++ b/src/include/flock/prompt_manager/prompt_manager.hpp @@ -4,6 +4,7 @@ #include "flock/core/common.hpp" #include "flock/core/config.hpp" +#include "flock/model_manager/model.hpp" #include "flock/prompt_manager/repository.hpp" #include @@ -45,19 +46,41 @@ class PromptManager { static std::string ConstructInputTuples(const nlohmann::json& columns, const std::string& tuple_format = "XML"); +private: + // Helper function to transcribe audio column and create transcription text column + static nlohmann::json TranscribeAudioColumn(const nlohmann::json& audio_column); + +public: template static std::tuple Render(const std::string& user_prompt, const nlohmann::json& columns, FunctionType option, const std::string& tuple_format = "XML") { - auto media_data = nlohmann::json::array(); + auto image_data = nlohmann::json::array(); auto tabular_data = nlohmann::json::array(); + for (auto i = 0; i < static_cast(columns.size()); i++) { - if (columns[i].contains("type") && columns[i]["type"] == "image") { - media_data.push_back(columns[i]); + if (columns[i].contains("type")) { + auto column_type = columns[i]["type"].get(); + if (column_type == "image") { + image_data.push_back(columns[i]); + } else if (column_type == "audio") { + // Transcribe audio and merge as tabular text data + if (columns[i].contains("transcription_model")) { + auto transcription_column = TranscribeAudioColumn(columns[i]); + tabular_data.push_back(transcription_column); + } + } else { + tabular_data.push_back(columns[i]); + } } else { tabular_data.push_back(columns[i]); } } + // Create media_data as an object with only image array (audio is now in tabular_data) + nlohmann::json media_data; + media_data["image"] = image_data; + media_data["audio"] = nlohmann::json::array();// Empty - audio is now in tabular_data + auto prompt = PromptManager::GetTemplate(option); prompt = PromptManager::ReplaceSection(prompt, PromptSection::USER_PROMPT, user_prompt); if (!tabular_data.empty()) { diff --git a/src/prompt_manager/prompt_manager.cpp b/src/prompt_manager/prompt_manager.cpp index 4d497d4a..477d8a7a 100644 --- a/src/prompt_manager/prompt_manager.cpp +++ b/src/prompt_manager/prompt_manager.cpp @@ -216,4 +216,35 @@ PromptDetails PromptManager::CreatePromptDetails(const nlohmann::json& prompt_de } return prompt_details; } + +nlohmann::json PromptManager::TranscribeAudioColumn(const nlohmann::json& audio_column) { + auto transcription_model_name = audio_column["transcription_model"].get(); + + // Look up the transcription model + nlohmann::json transcription_model_json; + transcription_model_json["model_name"] = transcription_model_name; + Model transcription_model(transcription_model_json); + + // Add transcription requests to batch + transcription_model.AddTranscriptionRequest(audio_column["data"]); + + // Collect transcriptions + auto transcription_results = transcription_model.CollectTranscriptions(); + + // Convert vector to nlohmann::json array + nlohmann::json transcriptions = nlohmann::json::array(); + for (const auto& result: transcription_results) { + transcriptions.push_back(result); + } + + // Create transcription column with proper naming + auto transcription_column = nlohmann::json::object(); + auto original_name = audio_column.contains("name") ? audio_column["name"].get() : ""; + auto transcription_name = original_name.empty() ? "transcription" : "transcription_of_" + original_name; + transcription_column["name"] = transcription_name; + transcription_column["data"] = transcriptions; + + return transcription_column; +} + }// namespace flock From 1b280c5f22579f5339d8226dfad95f552408a228 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:39:39 -0500 Subject: [PATCH 27/59] Added transcription mock methods and OLLAMA secret to test base classes --- .../functions/aggregate/llm_aggregate_function_test_base.hpp | 3 +++ test/unit/functions/mock_provider.hpp | 2 ++ .../functions/scalar/llm_function_test_base_instantiations.cpp | 3 +++ 3 files changed, 8 insertions(+) diff --git a/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp b/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp index ffaffd7d..0f75c0bc 100644 --- a/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp +++ b/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp @@ -27,6 +27,9 @@ class LLMAggregateTestBase : public ::testing::Test { con.Query(" CREATE SECRET (" " TYPE OPENAI," " API_KEY 'your-api-key');"); + con.Query(" CREATE SECRET (" + " TYPE OLLAMA," + " API_URL '127.0.0.1:11434');"); mock_provider = std::make_shared(ModelDetails{}); Model::SetMockProvider(mock_provider); diff --git a/test/unit/functions/mock_provider.hpp b/test/unit/functions/mock_provider.hpp index d53c90e9..4a0b6f8c 100644 --- a/test/unit/functions/mock_provider.hpp +++ b/test/unit/functions/mock_provider.hpp @@ -10,8 +10,10 @@ class MockProvider : public IProvider { MOCK_METHOD(void, AddCompletionRequest, (const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data), (override)); MOCK_METHOD(void, AddEmbeddingRequest, (const std::vector& inputs), (override)); + MOCK_METHOD(void, AddTranscriptionRequest, (const nlohmann::json& audio_files), (override)); MOCK_METHOD(std::vector, CollectCompletions, (const std::string& contentType), (override)); MOCK_METHOD(std::vector, CollectEmbeddings, (const std::string& contentType), (override)); + MOCK_METHOD(std::vector, CollectTranscriptions, (const std::string& contentType), (override)); }; }// namespace flock diff --git a/test/unit/functions/scalar/llm_function_test_base_instantiations.cpp b/test/unit/functions/scalar/llm_function_test_base_instantiations.cpp index 487ab323..6aaba0fa 100644 --- a/test/unit/functions/scalar/llm_function_test_base_instantiations.cpp +++ b/test/unit/functions/scalar/llm_function_test_base_instantiations.cpp @@ -12,6 +12,9 @@ void LLMFunctionTestBase::SetUp() { con.Query(" CREATE SECRET (" " TYPE OPENAI," " API_KEY 'your-api-key');"); + con.Query(" CREATE SECRET (" + " TYPE OLLAMA," + " API_URL '127.0.0.1:11434');"); mock_provider = std::make_shared(ModelDetails{}); Model::SetMockProvider(mock_provider); From 15537ffc40b2be3d6910640974f8059635374ed5 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:39:49 -0500 Subject: [PATCH 28/59] Added unit tests for audio transcription in llm_complete and llm_filter --- test/unit/functions/scalar/llm_complete.cpp | 101 ++++++++++++++++++++ test/unit/functions/scalar/llm_filter.cpp | 86 +++++++++++++++++ 2 files changed, 187 insertions(+) diff --git a/test/unit/functions/scalar/llm_complete.cpp b/test/unit/functions/scalar/llm_complete.cpp index ccabea7f..ccc6f93b 100644 --- a/test/unit/functions/scalar/llm_complete.cpp +++ b/test/unit/functions/scalar/llm_complete.cpp @@ -161,4 +161,105 @@ TEST_F(LLMCompleteTest, Operation_LargeInputSet_ProcessesCorrectly) { } } +// Test llm_complete with audio transcription +TEST_F(LLMCompleteTest, LLMCompleteWithAudioTranscription) { + const nlohmann::json expected_transcription = "{\"text\": \"This is a transcribed audio\"}"; + const nlohmann::json expected_complete_response = {{"items", {"Based on the transcription: This is a transcribed audio"}}}; + + // Mock transcription model + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_complete(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize this audio', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test llm_complete with audio and text columns +TEST_F(LLMCompleteTest, LLMCompleteWithAudioAndText) { + const nlohmann::json expected_transcription = "{\"text\": \"Product audio description\"}"; + const nlohmann::json expected_complete_response = {{"items", {"Combined response"}}}; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_complete(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Describe this product', " + "'context_columns': [" + "{'data': product, 'name': 'product'}, " + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('Wireless Headphones', 'https://example.com/audio.mp3') AS tbl(product, audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling +TEST_F(LLMCompleteTest, LLMCompleteAudioTranscriptionError) { + auto con = Config::GetConnection(); + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_complete(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Summarize this audio', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + +// Test audio transcription with missing transcription_model +TEST_F(LLMCompleteTest, LLMCompleteAudioMissingTranscriptionModel) { + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_complete(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize this audio', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because transcription_model is required for audio type + ASSERT_TRUE(results->HasError()); +} + }// namespace flock \ No newline at end of file diff --git a/test/unit/functions/scalar/llm_filter.cpp b/test/unit/functions/scalar/llm_filter.cpp index f7029ec8..ed822136 100644 --- a/test/unit/functions/scalar/llm_filter.cpp +++ b/test/unit/functions/scalar/llm_filter.cpp @@ -117,4 +117,90 @@ TEST_F(LLMFilterTest, Operation_LargeInputSet_ProcessesCorrectly) { } } +// Test llm_filter with audio transcription +TEST_F(LLMFilterTest, LLMFilterWithAudioTranscription) { + const nlohmann::json expected_transcription = "{\"text\": \"This audio contains positive sentiment\"}"; + const nlohmann::json expected_complete_response = {{"items", {true}}}; + + // Mock transcription model + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_filter(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Is the sentiment in this audio positive?', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test llm_filter with audio and text columns +TEST_F(LLMFilterTest, LLMFilterWithAudioAndText) { + const nlohmann::json expected_transcription = "{\"text\": \"Product review audio\"}"; + const nlohmann::json expected_complete_response = {{"items", {true}}}; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_filter(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Is this product review positive?', " + "'context_columns': [" + "{'data': text_review, 'name': 'text_review'}, " + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('Great product', 'https://example.com/audio.mp3') AS tbl(text_review, audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling for Ollama +TEST_F(LLMFilterTest, LLMFilterAudioTranscriptionOllamaError) { + auto con = Config::GetConnection(); + + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_filter(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Is the sentiment positive?', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + }// namespace flock From 2a3da647c134d633fc29a65c36706c014b8e7e0c Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:39:55 -0500 Subject: [PATCH 29/59] Added unit tests for audio transcription in aggregate LLM functions --- test/unit/functions/aggregate/llm_first.cpp | 54 +++++++++++++ test/unit/functions/aggregate/llm_last.cpp | 54 +++++++++++++ test/unit/functions/aggregate/llm_reduce.cpp | 85 ++++++++++++++++++++ test/unit/functions/aggregate/llm_rerank.cpp | 59 ++++++++++++++ 4 files changed, 252 insertions(+) diff --git a/test/unit/functions/aggregate/llm_first.cpp b/test/unit/functions/aggregate/llm_first.cpp index 61f0ba89..5e86638c 100644 --- a/test/unit/functions/aggregate/llm_first.cpp +++ b/test/unit/functions/aggregate/llm_first.cpp @@ -160,4 +160,58 @@ TEST_F(LLMFirstTest, Operation_LargeInputSet_ProcessesCorrectly) { } } +// Test llm_first with audio transcription +TEST_F(LLMFirstTest, LLMFirstWithAudioTranscription) { + const nlohmann::json expected_transcription = "{\"text\": \"First audio candidate\"}"; + const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); + + // Mock transcription model + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_first(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the best audio candidate. Return ID 0.', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling for Ollama +TEST_F(LLMFirstTest, LLMFirstAudioTranscriptionOllamaError) { + auto con = Config::GetConnection(); + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_first(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Select the best audio. Return ID 0.', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + }// namespace flock diff --git a/test/unit/functions/aggregate/llm_last.cpp b/test/unit/functions/aggregate/llm_last.cpp index bd09d450..988fd032 100644 --- a/test/unit/functions/aggregate/llm_last.cpp +++ b/test/unit/functions/aggregate/llm_last.cpp @@ -160,4 +160,58 @@ TEST_F(LLMLastTest, Operation_LargeInputSet_ProcessesCorrectly) { } } +// Test llm_last with audio transcription +TEST_F(LLMLastTest, LLMLastWithAudioTranscription) { + const nlohmann::json expected_transcription = "{\"text\": \"Last audio candidate\"}"; + const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); + + // Mock transcription model + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_last(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the worst audio candidate. Return ID 0.', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling for Ollama +TEST_F(LLMLastTest, LLMLastAudioTranscriptionOllamaError) { + auto con = Config::GetConnection(); + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_last(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Select the worst audio. Return ID 0.', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + }// namespace flock diff --git a/test/unit/functions/aggregate/llm_reduce.cpp b/test/unit/functions/aggregate/llm_reduce.cpp index f7c425a9..b730c3f4 100644 --- a/test/unit/functions/aggregate/llm_reduce.cpp +++ b/test/unit/functions/aggregate/llm_reduce.cpp @@ -151,4 +151,89 @@ TEST_F(LLMReduceTest, Operation_LargeInputSet_ProcessesCorrectly) { } } +// Test llm_reduce with audio transcription +TEST_F(LLMReduceTest, LLMReduceWithAudioTranscription) { + const nlohmann::json expected_transcription = "{\"text\": \"This is a transcribed audio summary\"}"; + const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); + + // Mock transcription model + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following audio content', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test llm_reduce with audio and text columns +TEST_F(LLMReduceTest, LLMReduceWithAudioAndText) { + const nlohmann::json expected_transcription = "{\"text\": \"Product audio review\"}"; + const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the product reviews', " + "'context_columns': [" + "{'data': text_review, 'name': 'text_review'}, " + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES ('Great product', 'https://example.com/audio.mp3') AS tbl(text_review, audio_url);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling for Ollama +TEST_F(LLMReduceTest, LLMReduceAudioTranscriptionOllamaError) { + auto con = Config::GetConnection(); + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_reduce(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Summarize this audio', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + }// namespace flock diff --git a/test/unit/functions/aggregate/llm_rerank.cpp b/test/unit/functions/aggregate/llm_rerank.cpp index d2ddaf7a..0408efc2 100644 --- a/test/unit/functions/aggregate/llm_rerank.cpp +++ b/test/unit/functions/aggregate/llm_rerank.cpp @@ -179,4 +179,63 @@ TEST_F(LLMRerankTest, Operation_LargeInputSet_ProcessesCorrectly) { ::testing::Mock::AllowLeak(mock_provider.get()); } +// Test llm_rerank with audio transcription +TEST_F(LLMRerankTest, LLMRerankWithAudioTranscription) { + const nlohmann::json expected_transcription1 = "{\"text\": \"First audio candidate\"}"; + const nlohmann::json expected_transcription2 = "{\"text\": \"Second audio candidate\"}"; + const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); + + // Mock transcription model (called for each audio file) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(2); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription1})) + .WillOnce(::testing::Return(std::vector{expected_transcription2})); + + // Mock completion model + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_complete_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query( + "SELECT llm_rerank(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Rank these audio candidates from best to worst', " + "'context_columns': [" + "{'data': 'https://example.com/audio1.mp3', " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}, " + "{'data': 'https://example.com/audio2.mp3', " + "'type': 'audio', " + "'transcription_model': 'gpt-4o-transcribe'}" + "]}) AS result FROM VALUES (1) AS tbl(id);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); +} + +// Test audio transcription error handling for Ollama +TEST_F(LLMRerankTest, LLMRerankAudioTranscriptionOllamaError) { + auto con = Config::GetConnection(); + // Mock transcription model to throw error (simulating Ollama behavior) + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); + + // Test with Ollama which doesn't support transcription + const auto results = con.Query( + "SELECT llm_rerank(" + "{'model_name': 'llama3'}, " + "{'prompt': 'Rank these audio files', " + "'context_columns': [" + "{'data': audio_url, " + "'type': 'audio', " + "'transcription_model': 'llama3'}" + "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + + // Should fail because Ollama doesn't support transcription + ASSERT_TRUE(results->HasError()); +} + }// namespace flock From cd2d86911858b5976880ca49f74651876b5ce0a8 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:40:39 -0500 Subject: [PATCH 30/59] Added unit tests for transcription in model provider adapters --- .../model_manager/model_providers_test.cpp | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/test/unit/model_manager/model_providers_test.cpp b/test/unit/model_manager/model_providers_test.cpp index a4a377c7..cc55eadb 100644 --- a/test/unit/model_manager/model_providers_test.cpp +++ b/test/unit/model_manager/model_providers_test.cpp @@ -137,6 +137,69 @@ TEST(ModelProvidersTest, OllamaProviderTest) { auto embedding_results = mock_provider.CollectEmbeddings("application/json"); ASSERT_EQ(embedding_results.size(), 1); EXPECT_EQ(embedding_results[0], expected_embedding_response); + + // Set up mock behavior for AddTranscriptionRequest and CollectTranscriptions + const json audio_files = json::array({"https://example.com/audio.mp3"}); + const json expected_transcription_response = {{"text", "This is a test transcription"}}; + + EXPECT_CALL(mock_provider, AddTranscriptionRequest(audio_files)) + .Times(1); + EXPECT_CALL(mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription_response})); + + // Test the mocked transcription methods + mock_provider.AddTranscriptionRequest(audio_files); + auto transcription_results = mock_provider.CollectTranscriptions("multipart/form-data"); + ASSERT_EQ(transcription_results.size(), 1); + EXPECT_EQ(transcription_results[0], expected_transcription_response); +} + +// Test Ollama provider transcription error +TEST(ModelProvidersTest, OllamaProviderTranscriptionError) { + ModelDetails model_details; + model_details.model_name = "test_model"; + model_details.model = "llama3"; + model_details.provider_name = "ollama"; + model_details.model_parameters = {{"temperature", 0.7}}; + model_details.secret = {{"api_url", "http://localhost:11434"}}; + + OllamaProvider provider(model_details); + const json audio_files = json::array({"https://example.com/audio.mp3"}); + + // Ollama should throw an error when transcription is requested + EXPECT_THROW(provider.AddTranscriptionRequest(audio_files), std::runtime_error); +} + +// Test transcription with multiple audio files +TEST(ModelProvidersTest, TranscriptionWithMultipleFiles) { + ModelDetails model_details; + model_details.model_name = "test_model"; + model_details.model = "gpt-4o-transcribe"; + model_details.provider_name = "openai"; + model_details.model_parameters = {}; + model_details.secret = {{"api_key", "test_api_key"}}; + + MockProvider mock_provider(model_details); + + const json audio_files = json::array({"https://example.com/audio1.mp3", + "https://example.com/audio2.mp3", + "https://example.com/audio3.mp3"}); + const std::vector expected_transcription_responses = { + {{"text", "First transcription"}}, + {{"text", "Second transcription"}}, + {{"text", "Third transcription"}}}; + + EXPECT_CALL(mock_provider, AddTranscriptionRequest(audio_files)) + .Times(1); + EXPECT_CALL(mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(expected_transcription_responses)); + + mock_provider.AddTranscriptionRequest(audio_files); + auto transcription_results = mock_provider.CollectTranscriptions("multipart/form-data"); + ASSERT_EQ(transcription_results.size(), 3); + EXPECT_EQ(transcription_results[0], expected_transcription_responses[0]); + EXPECT_EQ(transcription_results[1], expected_transcription_responses[1]); + EXPECT_EQ(transcription_results[2], expected_transcription_responses[2]); } }// namespace flock \ No newline at end of file From 72aa2882cc12f23fa46700e339e3928e13a91838 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:40:47 -0500 Subject: [PATCH 31/59] Added integration tests for audio transcription in scalar LLM functions --- .../functions/scalar/test_llm_complete.py | 274 ++++++++++++++++++ .../tests/functions/scalar/test_llm_filter.py | 97 +++++++ 2 files changed, 371 insertions(+) diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py b/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py index 8afd3df4..420fd0d3 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py @@ -662,3 +662,277 @@ def test_llm_complete_image_with_text_context(integration_setup, model_config): assert result.returncode == 0, f"Query failed with error: {result.stderr}" assert "atmosphere_description" in result.stdout.lower() assert len(result.stdout.strip().split("\n")) >= 2 + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models.""" + return request.param + + +def test_llm_complete_with_audio_transcription( + integration_setup, transcription_model_config +): + """Test llm_complete with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + # Skip if not OpenAI (only OpenAI supports transcription currently) + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + # Create main completion model + test_model_name = f"test-audio-complete_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + # Create transcription model + transcription_model_name = f"test-transcription-model_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + # Use a publicly available test audio file URL + # Note: In real tests, you might want to use a mock server or local file + query = ( + """ + SELECT llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Summarize what you hear in this audio clip in one sentence.', + 'context_columns': [ + { + 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS audio_summary; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + # Note: This test may fail if the audio URL is not accessible + # In a real scenario, you'd use a mock server or local test file + if result.returncode != 0: + # If it fails due to network/audio issues, that's acceptable for integration tests + # We're mainly testing that the query structure is correct + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "audio_summary" in result.stdout.lower() + + +def test_llm_complete_with_audio_and_text( + integration_setup, transcription_model_config +): + """Test llm_complete with both audio and text context columns.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-text_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + query = ( + """ + SELECT llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Based on the product name {product} and the audio description, write a marketing description.', + 'context_columns': [ + {'data': 'Wireless Headphones', 'name': 'product'}, + { + 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS marketing_copy; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + # Acceptable if network/audio issues occur + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "marketing_copy" in result.stdout.lower() + + +def test_llm_complete_audio_missing_transcription_model(integration_setup): + """Test that audio type requires transcription_model.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-audio-error" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + query = ( + """ + SELECT llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Summarize this audio', + 'context_columns': [ + { + 'data': 'https://example.com/audio.mp3', + 'type': 'audio' + } + ] + } + ) AS result; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + # Should fail because transcription_model is required for audio type + assert result.returncode != 0 + assert ( + "transcription_model" in result.stderr.lower() + or "required" in result.stderr.lower() + ) + + +def test_llm_complete_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription.""" + duckdb_cli_path, db_path = integration_setup + + create_model_query = "CREATE MODEL('test-ollama-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + query = """ + SELECT llm_complete( + {'model_name': 'test-ollama-audio'}, + { + 'prompt': 'Summarize this audio', + 'context_columns': [ + { + 'data': 'https://example.com/audio.mp3', + 'type': 'audio', + 'transcription_model': 'test-ollama-transcription' + } + ] + } + ) AS result; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + # Should fail because Ollama doesn't support transcription + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) + + +def test_llm_complete_audio_batch_processing( + integration_setup, transcription_model_config +): + """Test batch processing with multiple audio files.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-batch_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-batch_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE audio_clips ( + id INTEGER, + audio_url VARCHAR, + product_name VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO audio_clips + VALUES + (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Headphones'), + (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Speaker'), + (3, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Microphone'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = ( + """ + SELECT product_name, + llm_complete( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Based on the product {product} and its audio, write a short description.', + 'context_columns': [ + {'data': product_name, 'name': 'product'}, + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS description + FROM audio_clips + WHERE id <= 2; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + # Acceptable if network/audio issues occur + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + lines = result.stdout.strip().split("\n") + assert len(lines) >= 3 # Header + at least 2 data rows diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py index 93b6930c..42f30353 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py @@ -618,3 +618,100 @@ def test_llm_filter_image_with_text_context(integration_setup, model_config): assert result.returncode == 0, f"Query failed with error: {result.stderr}" assert "is_appropriate" in result.stdout.lower() assert len(result.stdout.strip().split("\n")) >= 2 + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models (OpenAI/Azure only).""" + return request.param + + +def test_llm_filter_with_audio_transcription(integration_setup, transcription_model_config): + """Test llm_filter with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-filter_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-filter_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + query = ( + """ + SELECT llm_filter( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Does this audio contain positive sentiment? Answer true or false.', + 'context_columns': [ + { + 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS is_positive; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "is_positive" in result.stdout.lower() + assert "true" in result.stdout.lower() or "false" in result.stdout.lower() + + +def test_llm_filter_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription in llm_filter.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-ollama-filter-audio" + create_model_query = "CREATE MODEL('test-ollama-filter-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = "test-ollama-filter-transcription" + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-filter-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + query = """ + SELECT llm_filter( + {'model_name': 'test-ollama-filter-audio'}, + { + 'prompt': 'Is the sentiment positive?', + 'context_columns': [ + { + 'data': 'https://example.com/audio.mp3', + 'type': 'audio', + 'transcription_model': 'test-ollama-filter-transcription' + } + ] + } + ) AS result; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) From fc1d29789af7e95704b7a42a583047d28f817c01 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:40:59 -0500 Subject: [PATCH 32/59] Added integration tests for audio transcription in aggregate LLM functions --- .../functions/aggregate/test_llm_first.py | 128 +++++++++++++++++ .../functions/aggregate/test_llm_last.py | 128 +++++++++++++++++ .../functions/aggregate/test_llm_reduce.py | 128 +++++++++++++++++ .../functions/aggregate/test_llm_rerank.py | 131 ++++++++++++++++++ 4 files changed, 515 insertions(+) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py index 6e03b5ee..3143f9dc 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py @@ -731,3 +731,131 @@ def test_llm_first_image_batch_processing(integration_setup, model_config): f"Expected at least 4 lines (header + 3 cities), got {len(lines)}" ) assert "tallest_building" in result.stdout.lower() + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models (OpenAI/Azure only).""" + return request.param + + +def test_llm_first_with_audio_transcription(integration_setup, transcription_model_config): + """Test llm_first with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-first_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-first_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE audio_candidates ( + id INTEGER, + audio_url VARCHAR, + name VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO audio_candidates + VALUES + (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), + (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = ( + """ + SELECT llm_first( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Which candidate has the best audio interview? Return the ID number only.', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS selected_candidate + FROM audio_candidates; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "selected_candidate" in result.stdout.lower() + + +def test_llm_first_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription in llm_first.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-ollama-first-audio" + create_model_query = "CREATE MODEL('test-ollama-first-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = "test-ollama-first-transcription" + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-first-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE test_audio ( + id INTEGER, + audio_url VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = """ + SELECT llm_first( + {'model_name': 'test-ollama-first-audio'}, + { + 'prompt': 'Select the best audio. Return ID only.', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': 'test-ollama-first-transcription' + } + ] + } + ) AS result + FROM test_audio; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py index bb0892ab..0ce9a4a3 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py @@ -713,3 +713,131 @@ def test_llm_last_image_batch_processing(integration_setup, model_config): f"Expected at least 4 lines (header + 3 cuisines), got {len(lines)}" ) assert "lowest_rated_restaurant" in result.stdout.lower() + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models (OpenAI/Azure only).""" + return request.param + + +def test_llm_last_with_audio_transcription(integration_setup, transcription_model_config): + """Test llm_last with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-last_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-last_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE audio_candidates ( + id INTEGER, + audio_url VARCHAR, + name VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO audio_candidates + VALUES + (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), + (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = ( + """ + SELECT llm_last( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Which candidate has the worst audio interview? Return the ID number only.', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS selected_candidate + FROM audio_candidates; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "selected_candidate" in result.stdout.lower() + + +def test_llm_last_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription in llm_last.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-ollama-last-audio" + create_model_query = "CREATE MODEL('test-ollama-last-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = "test-ollama-last-transcription" + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-last-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE test_audio ( + id INTEGER, + audio_url VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = """ + SELECT llm_last( + {'model_name': 'test-ollama-last-audio'}, + { + 'prompt': 'Select the worst audio. Return ID only.', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': 'test-ollama-last-transcription' + } + ] + } + ) AS result + FROM test_audio; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py index 435d34cf..0dd670c6 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py @@ -785,3 +785,131 @@ def test_llm_reduce_image_batch_processing(integration_setup, model_config): assert result.returncode == 0, f"Query failed with error: {result.stderr}" assert "landscape_summary" in result.stdout.lower() assert len(result.stdout.strip().split("\n")) >= 2 + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models (OpenAI/Azure only).""" + return request.param + + +def test_llm_reduce_with_audio_transcription(integration_setup, transcription_model_config): + """Test llm_reduce with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-reduce_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-reduce_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE audio_reviews ( + id INTEGER, + audio_url VARCHAR, + product_name VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO audio_reviews + VALUES + (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Headphones'), + (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Speaker'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = ( + """ + SELECT llm_reduce( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Summarize the key points from these audio reviews', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS audio_summary + FROM audio_reviews; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "audio_summary" in result.stdout.lower() + + +def test_llm_reduce_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription in llm_reduce.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-ollama-reduce-audio" + create_model_query = "CREATE MODEL('test-ollama-reduce-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = "test-ollama-reduce-transcription" + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-reduce-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE test_audio ( + id INTEGER, + audio_url VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = """ + SELECT llm_reduce( + {'model_name': 'test-ollama-reduce-audio'}, + { + 'prompt': 'Summarize this audio', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': 'test-ollama-reduce-transcription' + } + ] + } + ) AS result + FROM test_audio; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py index a60d470c..537837b2 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py @@ -744,3 +744,134 @@ def test_llm_rerank_image_batch_processing(integration_setup, model_config): f"Expected at least 4 lines (header + 3 countries), got {len(lines)}" ) assert "ranked_destinations" in result.stdout.lower() + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) +def transcription_model_config(request): + """Fixture to test with transcription-capable models (OpenAI/Azure only).""" + return request.param + + +def test_llm_rerank_with_audio_transcription(integration_setup, transcription_model_config): + """Test llm_rerank with audio transcription using OpenAI.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = transcription_model_config + + if provider != "openai": + pytest.skip("Audio transcription is only supported for OpenAI provider") + + test_model_name = f"test-audio-rerank_{model_name}" + create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = f"test-transcription-rerank_{model_name}" + create_transcription_model_query = ( + f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE audio_candidates ( + id INTEGER, + audio_url VARCHAR, + name VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO audio_candidates + VALUES + (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), + (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'), + (3, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Carol'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = ( + """ + SELECT llm_rerank( + {'model_name': '""" + + test_model_name + + """'}, + { + 'prompt': 'Rank these audio interviews from best to worst', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': '""" + + transcription_model_name + + """' + } + ] + } + ) AS ranked_candidates + FROM audio_candidates; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + if result.returncode != 0: + assert ( + "transcription" in result.stderr.lower() + or "audio" in result.stderr.lower() + or "error" in result.stderr.lower() + ) + else: + assert "ranked_candidates" in result.stdout.lower() + + +def test_llm_rerank_audio_ollama_error(integration_setup): + """Test that Ollama provider throws error for audio transcription in llm_rerank.""" + duckdb_cli_path, db_path = integration_setup + + test_model_name = "test-ollama-rerank-audio" + create_model_query = "CREATE MODEL('test-ollama-rerank-audio', 'llama3.2', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query) + + transcription_model_name = "test-ollama-rerank-transcription" + create_transcription_model_query = ( + "CREATE MODEL('test-ollama-rerank-transcription', 'llama3.2', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + + create_table_query = """ + CREATE OR REPLACE TABLE test_audio ( + id INTEGER, + audio_url VARCHAR + ); + """ + run_cli(duckdb_cli_path, db_path, create_table_query) + + insert_data_query = """ + INSERT INTO test_audio VALUES + (1, 'https://example.com/audio1.mp3'), + (2, 'https://example.com/audio2.mp3'); + """ + run_cli(duckdb_cli_path, db_path, insert_data_query) + + query = """ + SELECT llm_rerank( + {'model_name': 'test-ollama-rerank-audio'}, + { + 'prompt': 'Rank these audio files', + 'context_columns': [ + { + 'data': audio_url, + 'type': 'audio', + 'transcription_model': 'test-ollama-rerank-transcription' + } + ] + } + ) AS result + FROM test_audio; + """ + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode != 0 + assert ( + "ollama" in result.stderr.lower() + or "transcription" in result.stderr.lower() + or "not supported" in result.stderr.lower() + ) From 893be8688ee732c8dc7355c488ebbd4adf875cc9 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:41:45 -0500 Subject: [PATCH 33/59] Updated unit test database with audio transcription test data --- test/unit/unit_test.db | Bin 3944448 -> 4206592 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/unit/unit_test.db b/test/unit/unit_test.db index 770a7caf886f2bc5442925ef04023f5170ed5393..e3194bcb637c89601e639a3bc97183ba099575a4 100644 GIT binary patch delta 1824 zcmbVMU2IfE6rMA8f4kf6mi?pMmTsZ67HxWfmM)TDffyfnU`Y%Z6X~vUDK#w==mR0d z7NQ6FOSayG;lXHYqHSt)t82VGXj-BKs}T!67>I^_P=gvn)fk%?SZD5TOS8ThPjcqY z`R4r3xigfTgQ46IAMv|yfBfN|qnj@N$}0&)m_IsXR^~%yCK`P zHehr9Se4K3V|Jtd@@}s$pYLYi+|OX%W1RRT;`ixn-eZB91Sg%wy^a;xn^&HMvartH zawtoOG1#%)IC(W^kLv8E*T}@r*jDRor?GkCMLrtJre|6mSx4vh&wF7n({#r7vAxV= z92%?f_RTO?cmF_3+rgFr0ku4+Nev@0(d=E+(^>q#tM(fwIzys54owS=%9l7wgE}^5 z#}`f;jp;2d72&)joyp|vIkGYqov|Tg(<1oUdTXbB=!79*1`#E?=eumFMp#7pOVlNvil2d1=BPGDSQsB4c z;-_9nI+;ZgBQ+5=_w+#-7oTO|5%YKk-YyrrUxyp!To`g-_G56?Wh666k*kHM=o^Dd zF`j`6bK4AjtC>48FtSoLVDdq$(w3@u>`yrFgof<(OI_MRQ&A*>Fs&*Pw}3}GLOL6% zTbi;QuV{&U8cKk_L9}pQ9hbVddCA$VRswu#D*-;Tn=f#FqRec)3pWN;J}ApWA@8fW z$il5Du*9U&NQ%~G{;K(_k0-rhIqomXf8#;lC2+?hqR04oSD~w{(VVN{OYOaR$B{Y$V5@R z8|7(n%ixuP-&ZWnv_WTI*O~VXKI$;Pd5ur0!HV0|=!w}4JZ645%0G8OvjK_Jm<*>` zC!0Iu$o6Zef_p#oQk57p^YuZ}AoT#pTyi z+}q?=$%-_|#Ytscluz*}*u<7D9ub{WyrIf%DKVx@l&T`NuMneMJnCZieSi4o^Z8RL z44Nqn?N1Wg2nB6=4J6YQjdsCc=jZn+ewtK0MfYEi_dB;=~(GGP@4RVaaO0 z@CqJI9XRsb;htypBGA`!pr>VE5w&#xkDXV>s9`L`9CD+CgZ=t}R8L_&)r%;3t>krP zFRt&+6C&YS!gYk}3EK$U2{#aS5XK2N57?QOVjp(ce&a;vM6xUOlWMmmv-hTG-rB@uMnyG`ptR{A@Pa=GR1ICu*{r#HbOq4~1kAu~^hBHrqm}Xdf(v zVzc-#tp)X_7d(PulLw(Mx(&DKq4gnKi=Y&3>5KYgB)+s#8q_yyvFf5Dj&I{l!Ffw4**WAB*F_4tH1d*G}v7RC?L&ow(|jSN4KCGt_9h zgWm?QuBl*@IyN>A7^X9q*#lwcTBg$ezJP(F_t8&p!*_MWk44N%rsI$Gf4~wf9{> zl}m!HqbqpHQU~W?+6y>0&ZYFfGw?=4B|ij5|9Kcnpo(W8W10mKcI37r*-KMTgU;=Q zRq)c^w!BPr;!Q|}3I@y?9@0H>I}Ca~E*Q5wH34exS-9z;2Wr)75)vw8L+2)3h>uqo zo$^#!%L$?=YIC5=tsupwOR&%-j!RIZAhdVf1!Kw-!!X)9Ge(mY$u zr0{3xn~!yRT;$Q@y=aAS6RbD7FzN~1;}yZ=6WJ+lW4-0ZSE*30p&hZe`D&H9BAykg0v zwxIs$b6oLXrn)hLDc$rOuGL9o68xw2R0dP(SPmDQ@``nF8Ye+7q+x%J7A56?D3+eI zNl(0!2qgIbohSPH3dWapTT6@MgYY)TKVc28;a%^4X+gFAIz|UVMrA~*YkhS^S!=i0 zrh0boZpjSEtkb8)!^i%X#w{QdF%2<2#`HMT6HLQQBTN~lEYm2{ H7}NNFfNP?^ From 64058f309c1a47f6eadf8a3a9b694b16c80cee92 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 11:55:50 -0500 Subject: [PATCH 34/59] Added unit tests for TranscribeAudioColumn and made it public for testing --- .../flock/prompt_manager/prompt_manager.hpp | 1 - .../prompt_manager/prompt_manager_test.cpp | 178 ++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) diff --git a/src/include/flock/prompt_manager/prompt_manager.hpp b/src/include/flock/prompt_manager/prompt_manager.hpp index 9f46b6b6..9ef3cd9a 100644 --- a/src/include/flock/prompt_manager/prompt_manager.hpp +++ b/src/include/flock/prompt_manager/prompt_manager.hpp @@ -46,7 +46,6 @@ class PromptManager { static std::string ConstructInputTuples(const nlohmann::json& columns, const std::string& tuple_format = "XML"); -private: // Helper function to transcribe audio column and create transcription text column static nlohmann::json TranscribeAudioColumn(const nlohmann::json& audio_column); diff --git a/test/unit/prompt_manager/prompt_manager_test.cpp b/test/unit/prompt_manager/prompt_manager_test.cpp index b2154ecf..9aff5918 100644 --- a/test/unit/prompt_manager/prompt_manager_test.cpp +++ b/test/unit/prompt_manager/prompt_manager_test.cpp @@ -1,6 +1,11 @@ +#include "../functions/mock_provider.hpp" +#include "flock/core/config.hpp" +#include "flock/model_manager/model.hpp" #include "flock/prompt_manager/prompt_manager.hpp" #include "nlohmann/json.hpp" +#include #include +#include #include namespace flock { @@ -247,4 +252,177 @@ TEST(PromptManager, CreatePromptDetailsOnlyPromptName) { EXPECT_EQ(version, 6); } +// Test fixture for TranscribeAudioColumn tests +class TranscribeAudioColumnTest : public ::testing::Test { +protected: + void SetUp() override { + auto con = Config::GetConnection(); + con.Query(" CREATE SECRET (" + " TYPE OPENAI," + " API_KEY 'your-api-key');"); + con.Query(" CREATE SECRET (" + " TYPE OLLAMA," + " API_URL '127.0.0.1:11434');"); + + mock_provider = std::make_shared(ModelDetails{}); + Model::SetMockProvider(mock_provider); + } + + void TearDown() override { + Model::ResetMockProvider(); + mock_provider = nullptr; + } + + std::shared_ptr mock_provider; +}; + +// Test TranscribeAudioColumn with named column +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnWithName) { + json audio_column = { + {"name", "audio_review"}, + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/audio1.mp3", "https://example.com/audio2.mp3"}}}; + + json expected_transcription1 = "{\"text\": \"This is the first transcription\"}"; + json expected_transcription2 = "{\"text\": \"This is the second transcription\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription1, expected_transcription2})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + EXPECT_TRUE(result.contains("name")); + EXPECT_EQ(result["name"], "transcription_of_audio_review"); + EXPECT_TRUE(result.contains("data")); + EXPECT_TRUE(result["data"].is_array()); + EXPECT_EQ(result["data"].size(), 2); + EXPECT_EQ(result["data"][0], expected_transcription1); + EXPECT_EQ(result["data"][1], expected_transcription2); +} + +// Test TranscribeAudioColumn without name +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnWithoutName) { + json audio_column = { + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/audio.mp3"}}}; + + json expected_transcription = "{\"text\": \"Transcribed audio content\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + EXPECT_TRUE(result.contains("name")); + EXPECT_EQ(result["name"], "transcription"); + EXPECT_TRUE(result.contains("data")); + EXPECT_TRUE(result["data"].is_array()); + EXPECT_EQ(result["data"].size(), 1); + EXPECT_EQ(result["data"][0], expected_transcription); +} + +// Test TranscribeAudioColumn with empty name +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnWithEmptyName) { + json audio_column = { + {"name", ""}, + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/audio.mp3"}}}; + + json expected_transcription = "{\"text\": \"Transcribed content\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + EXPECT_TRUE(result.contains("name")); + EXPECT_EQ(result["name"], "transcription"); + EXPECT_TRUE(result.contains("data")); + EXPECT_EQ(result["data"].size(), 1); +} + +// Test TranscribeAudioColumn with single audio file +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnSingleFile) { + json audio_column = { + {"name", "podcast"}, + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/podcast.mp3"}}}; + + json expected_transcription = "{\"text\": \"Podcast transcription\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + EXPECT_EQ(result["name"], "transcription_of_podcast"); + EXPECT_EQ(result["data"].size(), 1); + EXPECT_EQ(result["data"][0], expected_transcription); +} + +// Test TranscribeAudioColumn with multiple audio files +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnMultipleFiles) { + json audio_column = { + {"name", "interviews"}, + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/interview1.mp3", "https://example.com/interview2.mp3", "https://example.com/interview3.mp3"}}}; + + json expected_transcription1 = "{\"text\": \"First interview\"}"; + json expected_transcription2 = "{\"text\": \"Second interview\"}"; + json expected_transcription3 = "{\"text\": \"Third interview\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription1, expected_transcription2, expected_transcription3})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + EXPECT_EQ(result["name"], "transcription_of_interviews"); + EXPECT_EQ(result["data"].size(), 3); + EXPECT_EQ(result["data"][0], expected_transcription1); + EXPECT_EQ(result["data"][1], expected_transcription2); + EXPECT_EQ(result["data"][2], expected_transcription3); +} + +// Test TranscribeAudioColumn output format (JSON array) +TEST_F(TranscribeAudioColumnTest, TranscribeAudioColumnOutputFormat) { + json audio_column = { + {"name", "test_audio"}, + {"type", "audio"}, + {"transcription_model", "gpt-4o-transcribe"}, + {"data", {"https://example.com/audio.mp3"}}}; + + json expected_transcription = "{\"text\": \"Test transcription\"}"; + + EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) + .WillOnce(::testing::Return(std::vector{expected_transcription})); + + auto result = PromptManager::TranscribeAudioColumn(audio_column); + + // Verify the result is a proper JSON object with name and data fields + EXPECT_TRUE(result.is_object()); + EXPECT_TRUE(result.contains("name")); + EXPECT_TRUE(result.contains("data")); + EXPECT_TRUE(result["data"].is_array()); + + // Verify data contains the transcription results + EXPECT_EQ(result["data"][0], expected_transcription); +} + }// namespace flock \ No newline at end of file From ff53e8f2f23691421f30ff3b5d603c52bbbdff6a Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:39:26 -0500 Subject: [PATCH 35/59] Added base64 encoding and regex URL detection to URL handler --- .../providers/handlers/url_handler.hpp | 109 +++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/url_handler.hpp b/src/include/flock/model_manager/providers/handlers/url_handler.hpp index 33965f62..e0944f66 100644 --- a/src/include/flock/model_manager/providers/handlers/url_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/url_handler.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -40,9 +41,11 @@ class URLHandler { return oss.str(); } - // Check if the given path is a URL + // Check if the given path is a URL using regex static bool IsUrl(const std::string& path) { - return path.find("http://") == 0 || path.find("https://") == 0; + // Regex pattern to match URLs: http:// or https:// + static const std::regex url_pattern(R"(^https?://)"); + return std::regex_search(path, url_pattern); } // Validate file exists and is not empty @@ -58,6 +61,7 @@ class URLHandler { } // Download file from URL to temporary location + // Supports http:// and https:// URLs static std::string DownloadFileToTemp(const std::string& url) { std::string extension = ExtractFileExtension(url); // If no extension found, try to infer from content-type or use empty extension @@ -126,6 +130,107 @@ class URLHandler { return result; } + + // Read file contents and convert to base64 + // Returns empty string if file cannot be read + static std::string ReadFileToBase64(const std::string& file_path) { + FILE* file = fopen(file_path.c_str(), "rb"); + if (!file) { + return ""; + } + + // Get file size + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + fseek(file, 0, SEEK_SET); + + if (file_size <= 0) { + fclose(file); + return ""; + } + + // Read file content + std::vector buffer(file_size); + size_t bytes_read = fread(buffer.data(), 1, file_size, file); + fclose(file); + + if (bytes_read != static_cast(file_size)) { + return ""; + } + + // Base64 encoding table + static const char base64_chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::string result; + result.reserve(((file_size + 2) / 3) * 4); + + for (size_t i = 0; i < bytes_read; i += 3) { + unsigned int octet_a = buffer[i]; + unsigned int octet_b = (i + 1 < bytes_read) ? buffer[i + 1] : 0; + unsigned int octet_c = (i + 2 < bytes_read) ? buffer[i + 2] : 0; + + unsigned int triple = (octet_a << 16) + (octet_b << 8) + octet_c; + + result.push_back(base64_chars[(triple >> 18) & 0x3F]); + result.push_back(base64_chars[(triple >> 12) & 0x3F]); + result.push_back((i + 1 < bytes_read) ? base64_chars[(triple >> 6) & 0x3F] : '='); + result.push_back((i + 2 < bytes_read) ? base64_chars[triple & 0x3F] : '='); + } + + return result; + } + + // Helper struct to return base64 content and temp file flag + struct Base64Result { + std::string base64_content; + bool is_temp_file; + std::string temp_file_path; + }; + + // Resolve file path or URL, read contents and convert to base64 + // If input is URL, downloads to temp file first + // Returns base64 content and temp file info for cleanup + // Throws std::runtime_error if file cannot be processed + static Base64Result ResolveFileToBase64(const std::string& file_path_or_url) { + Base64Result result; + result.is_temp_file = false; + + std::string file_path; + if (IsUrl(file_path_or_url)) { + file_path = DownloadFileToTemp(file_path_or_url); + if (file_path.empty()) { + throw std::runtime_error("Failed to download file: " + file_path_or_url); + } + result.is_temp_file = true; + result.temp_file_path = file_path; + } else { + file_path = file_path_or_url; + } + + if (!ValidateFile(file_path)) { + if (result.is_temp_file) { + std::remove(file_path.c_str()); + } + throw std::runtime_error("Invalid file: " + file_path_or_url); + } + + result.base64_content = ReadFileToBase64(file_path); + if (result.base64_content.empty()) { + if (result.is_temp_file) { + std::remove(file_path.c_str()); + } + throw std::runtime_error("Failed to read file: " + file_path_or_url); + } + + // Cleanup temp file after reading + if (result.is_temp_file) { + std::remove(file_path.c_str()); + result.temp_file_path.clear(); + } + + return result; + } }; }// namespace flock From 8ae1f431ffbed2a7eabd13497232e40d8548d5ce Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:41:50 -0500 Subject: [PATCH 36/59] Improved null safety and type checking in prompt manager --- src/prompt_manager/prompt_manager.cpp | 39 ++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/prompt_manager/prompt_manager.cpp b/src/prompt_manager/prompt_manager.cpp index 477d8a7a..8a4ff171 100644 --- a/src/prompt_manager/prompt_manager.cpp +++ b/src/prompt_manager/prompt_manager.cpp @@ -58,7 +58,12 @@ std::string PromptManager::ConstructInputTuplesHeaderXML(const nlohmann::json& c auto header = std::string("
"); auto column_idx = 1u; for (const auto& column: columns) { - auto column_name = column.contains("name") ? column["name"].get() : "COLUMN " + std::to_string(column_idx++); + std::string column_name; + if (column.contains("name") && column["name"].is_string()) { + column_name = column["name"].get(); + } else { + column_name = "COLUMN " + std::to_string(column_idx++); + } header += "" + column_name + ""; } header += "
\n"; @@ -72,7 +77,7 @@ std::string PromptManager::ConstructInputTuplesHeaderMarkdown(const nlohmann::js auto header = std::string(" | "); auto column_idx = 1u; for (const auto& column: columns) { - if (column.contains("name")) { + if (column.contains("name") && column["name"].is_string()) { header += "COLUMN_" + column["name"].get() + " | "; } else { header += "COLUMN " + std::to_string(column_idx++) + " | "; @@ -81,7 +86,12 @@ std::string PromptManager::ConstructInputTuplesHeaderMarkdown(const nlohmann::js header += "\n | "; column_idx = 1u; for (const auto& column: columns) { - auto column_name = column.contains("name") ? column["name"].get() : "COLUMN " + std::to_string(column_idx++); + std::string column_name; + if (column.contains("name") && column["name"].is_string()) { + column_name = column["name"].get(); + } else { + column_name = "COLUMN " + std::to_string(column_idx++); + } header += std::string(column_name.length(), '-') + " | "; } header += "\n"; @@ -97,7 +107,16 @@ std::string PromptManager::ConstructInputTuplesXML(const nlohmann::json& columns for (auto i = 0; i < static_cast(columns[0]["data"].size()); i++) { tuples_str += ""; for (const auto& column: columns) { - tuples_str += "" + column["data"][i].get() + ""; + std::string value_str; + const auto& data_item = column["data"][i]; + if (data_item.is_null()) { + value_str = ""; + } else if (data_item.is_string()) { + value_str = data_item.get(); + } else { + value_str = data_item.dump(); + } + tuples_str += "" + value_str + ""; } tuples_str += "\n"; } @@ -124,7 +143,12 @@ std::string PromptManager::ConstructInputTuplesJSON(const nlohmann::json& column auto tuples_json = nlohmann::json::object(); auto column_idx = 1u; for (const auto& column: columns) { - auto column_name = column.contains("name") ? column["name"].get() : "COLUMN " + std::to_string(column_idx++); + std::string column_name; + if (column.contains("name") && column["name"].is_string()) { + column_name = column["name"].get(); + } else { + column_name = "COLUMN " + std::to_string(column_idx++); + } tuples_json[column_name] = column["data"]; } auto tuples_str = tuples_json.dump(4); @@ -239,7 +263,10 @@ nlohmann::json PromptManager::TranscribeAudioColumn(const nlohmann::json& audio_ // Create transcription column with proper naming auto transcription_column = nlohmann::json::object(); - auto original_name = audio_column.contains("name") ? audio_column["name"].get() : ""; + std::string original_name; + if (audio_column.contains("name") && audio_column["name"].is_string()) { + original_name = audio_column["name"].get(); + } auto transcription_name = original_name.empty() ? "transcription" : "transcription_of_" + original_name; transcription_column["name"] = transcription_name; transcription_column["data"] = transcriptions; From 29ada0bc46455e6c24ba24683fb36862a9a150c6 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:42:06 -0500 Subject: [PATCH 37/59] Enhanced error handling and null checks in base handler --- .../providers/handlers/base_handler.hpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 2aebb11c..59f48478 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -99,9 +99,15 @@ class BaseModelProviderHandler : public IModelProviderHandler { if (is_transcription) { // Handle transcription requests (multipart/form-data) const auto& req = jsons[i]; + if (!req.contains("file_path") || req["file_path"].is_null()) { + trigger_error("Missing or null file_path in transcription request"); + } + if (!req.contains("model") || req["model"].is_null()) { + trigger_error("Missing or null model in transcription request"); + } auto file_path = req["file_path"].get(); auto model = req["model"].get(); - auto prompt = req.contains("prompt") ? req["prompt"].get() : ""; + auto prompt = req.contains("prompt") && !req["prompt"].is_null() ? req["prompt"].get() : ""; requests[i].is_temp_file = req.contains("is_temp_file") ? req["is_temp_file"].get() : false; if (requests[i].is_temp_file) { requests[i].temp_file_path = file_path; @@ -184,6 +190,7 @@ class BaseModelProviderHandler : public IModelProviderHandler { } curl_easy_getinfo(requests[i].easy, CURLINFO_RESPONSE_CODE, NULL); + if (isJson(requests[i].response)) { try { nlohmann::json parsed = nlohmann::json::parse(requests[i].response); @@ -197,7 +204,11 @@ class BaseModelProviderHandler : public IModelProviderHandler { } // Let provider extract output based on request type - results[i] = ExtractOutput(parsed, request_type); + try { + results[i] = ExtractOutput(parsed, request_type); + } catch (const std::exception& e) { + trigger_error(std::string("Output extraction error: ") + e.what()); + } } catch (const std::exception& e) { trigger_error(std::string("Response processing error: ") + e.what()); } From e918a81b20c2ccf086ad1514ffd1a9557ff53b85 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:42:16 -0500 Subject: [PATCH 38/59] Improved aggregate state initialization and metadata handling --- src/functions/aggregate/aggregate_state.cpp | 44 ++++++++++++++----- .../flock/functions/aggregate/aggregate.hpp | 16 +++---- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/functions/aggregate/aggregate_state.cpp b/src/functions/aggregate/aggregate_state.cpp index 3f50d922..0f23338e 100644 --- a/src/functions/aggregate/aggregate_state.cpp +++ b/src/functions/aggregate/aggregate_state.cpp @@ -3,9 +3,10 @@ namespace flock { void AggregateFunctionState::Initialize() { - if (!value) { - value = new nlohmann::json(nlohmann::json::array()); - } + value = new nlohmann::json(nlohmann::json::array()); + model_details = nlohmann::json::object(); + user_query = ""; + initialized = true; } void AggregateFunctionState::Update(const nlohmann::json& input) { @@ -14,7 +15,7 @@ void AggregateFunctionState::Update(const nlohmann::json& input) { } auto idx = 0u; - for (auto& column: input) { + for (const auto& column: input) { if (value->size() <= idx) { value->push_back(nlohmann::json::object()); (*value)[idx]["data"] = nlohmann::json::array(); @@ -25,7 +26,10 @@ void AggregateFunctionState::Update(const nlohmann::json& input) { (*value)[idx]["data"].push_back(item_value); } } else { - (*value)[idx][item.key()] = item.value(); + // For metadata, only set if not already set + if (!(*value)[idx].contains(item.key())) { + (*value)[idx][item.key()] = item.value(); + } } } idx++; @@ -45,14 +49,31 @@ void AggregateFunctionState::Combine(const AggregateFunctionState& source) { if (source.value) { auto idx = 0u; - for (auto& column: *source.value) { + for (const auto& column: *source.value) { + // Ensure the target value array has enough elements + if (value->size() <= idx) { + value->push_back(nlohmann::json::object()); + } + + // Initialize data array if it doesn't exist + if (!(*value)[idx].contains("data")) { + (*value)[idx]["data"] = nlohmann::json::array(); + } + + // Merge column metadata - preserve existing, add new for (const auto& item: column.items()) { if (item.key() == "data") { - for (const auto& item_value: item.value()) { - (*value)[idx]["data"].push_back(item_value); + // Append data items + if (item.value().is_array()) { + for (const auto& item_value: item.value()) { + (*value)[idx]["data"].push_back(item_value); + } } } else { - (*value)[idx][item.key()] = item.value(); + // For metadata (name, type, etc), only set if not already set + if (!(*value)[idx].contains(item.key())) { + (*value)[idx][item.key()] = item.value(); + } } } idx++; @@ -61,11 +82,14 @@ void AggregateFunctionState::Combine(const AggregateFunctionState& source) { } void AggregateFunctionState::Destroy() { + initialized = false; if (value) { delete value; value = nullptr; } - initialized = false; + model_details = nlohmann::json::object(); + user_query.clear(); + user_query.shrink_to_fit(); } }// namespace flock diff --git a/src/include/flock/functions/aggregate/aggregate.hpp b/src/include/flock/functions/aggregate/aggregate.hpp index 3b341d20..1f3963e2 100644 --- a/src/include/flock/functions/aggregate/aggregate.hpp +++ b/src/include/flock/functions/aggregate/aggregate.hpp @@ -51,12 +51,11 @@ class AggregateFunctionBase { auto state = reinterpret_cast(state_p); // Use placement new to properly construct the AggregateFunctionState object + // This handles memory allocation done by DuckDB new (state) AggregateFunctionState(); - if (!state->initialized) { - state->Initialize(); - state->initialized = true; - } + // Initialize the state (allocates JSON array, resets all fields) + state->Initialize(); } template @@ -140,7 +139,6 @@ class AggregateFunctionBase { auto* state = state_vector[i]; if (state) { state->Destroy(); - state->~AggregateFunctionState();// Explicitly call destructor } } } @@ -151,13 +149,9 @@ class AggregateFunctionBase { template static void FinalizeSafe(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset) { - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); - for (idx_t i = 0; i < count; i++) { - auto idx = i + offset; - auto* state = states_vector[idx]; - - result.SetValue(idx, "[]");// Empty JSON array as default + auto result_idx = i + offset; + result.SetValue(result_idx, "[]"); } } }; From 285ab8d8da5af6fcbf2a8b26072e157014efb3a8 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:42:47 -0500 Subject: [PATCH 39/59] Added null checks for transcription output in Azure and OpenAI handlers --- src/include/flock/model_manager/providers/handlers/azure.hpp | 2 +- src/include/flock/model_manager/providers/handlers/openai.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/azure.hpp b/src/include/flock/model_manager/providers/handlers/azure.hpp index 3fa34efa..16027c4a 100644 --- a/src/include/flock/model_manager/providers/handlers/azure.hpp +++ b/src/include/flock/model_manager/providers/handlers/azure.hpp @@ -91,7 +91,7 @@ class AzureModelManager : public BaseModelProviderHandler { nlohmann::json ExtractTranscriptionOutput(const nlohmann::json& response) const override { // Transcription API returns JSON with "text" field when response_format=json - if (response.contains("text")) { + if (response.contains("text") && !response["text"].is_null()) { return response["text"].get(); } return ""; diff --git a/src/include/flock/model_manager/providers/handlers/openai.hpp b/src/include/flock/model_manager/providers/handlers/openai.hpp index 064ba08c..8162e341 100644 --- a/src/include/flock/model_manager/providers/handlers/openai.hpp +++ b/src/include/flock/model_manager/providers/handlers/openai.hpp @@ -112,7 +112,7 @@ class OpenAIModelManager : public BaseModelProviderHandler { nlohmann::json ExtractTranscriptionOutput(const nlohmann::json& response) const override { // Transcription API returns JSON with "text" field when response_format=json - if (response.contains("text")) { + if (response.contains("text") && !response["text"].is_null()) { return response["text"].get(); } return ""; From 1e61924b7b9ed57f2dc81398ab751794479fa7b8 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:43:04 -0500 Subject: [PATCH 40/59] Updated Ollama handler to use chat API and improved response parsing --- .../providers/handlers/ollama.hpp | 47 ++++++++++++++++--- .../providers/adapters/azure.cpp | 20 ++++++-- .../providers/adapters/ollama.cpp | 31 +++++++++--- .../providers/adapters/openai.cpp | 31 ++++++++++-- 4 files changed, 109 insertions(+), 20 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/ollama.hpp b/src/include/flock/model_manager/providers/handlers/ollama.hpp index 27cf9f6d..51b88e1e 100644 --- a/src/include/flock/model_manager/providers/handlers/ollama.hpp +++ b/src/include/flock/model_manager/providers/handlers/ollama.hpp @@ -18,7 +18,7 @@ class OllamaModelManager : public BaseModelProviderHandler { OllamaModelManager& operator=(OllamaModelManager&&) = delete; protected: - std::string getCompletionUrl() const override { return _url + "/api/generate"; } + std::string getCompletionUrl() const override { return _url + "/api/chat"; } std::string getEmbedUrl() const override { return _url + "/api/embed"; } std::string getTranscriptionUrl() const override { return ""; } void prepareSessionForRequest(const std::string& url) override { _session.setUrl(url); } @@ -36,10 +36,12 @@ class OllamaModelManager : public BaseModelProviderHandler { } bool is_completion = (request_type == RequestType::Completion); if (is_completion) { - if ((response.contains("done_reason") && response["done_reason"] != "stop") || - (response.contains("done") && !response["done"].is_null() && response["done"].get() != true)) { + if (response.contains("done_reason") && response["done_reason"] != "stop") { throw std::runtime_error("The request was refused due to some internal error with Ollama API"); } + if (response.contains("done") && !response["done"].is_null() && !response["done"].get()) { + throw std::runtime_error("The request was not completed by Ollama API"); + } } else { if (response.contains("embeddings") && (!response["embeddings"].is_array() || response["embeddings"].empty())) { throw std::runtime_error("Ollama API returned empty or invalid embedding data."); @@ -48,10 +50,43 @@ class OllamaModelManager : public BaseModelProviderHandler { } nlohmann::json ExtractCompletionOutput(const nlohmann::json& response) const override { - if (response.contains("response")) { - return nlohmann::json::parse(response["response"].get()); + if (response.contains("message") && response["message"].is_object()) { + const auto& message = response["message"]; + if (message.contains("content")) { + const auto& content = message["content"]; + if (content.is_null()) { + std::cerr << "Error: Ollama API returned null content in message. Full response: " << response.dump(2) << std::endl; + throw std::runtime_error("Ollama API returned null content in message. Response: " + response.dump()); + } + if (content.is_string()) { + try { + auto parsed = nlohmann::json::parse(content.get()); + // Validate that parsed result has expected structure for aggregate functions + if (!parsed.contains("items") || !parsed["items"].is_array()) { + std::cerr << "Warning: Parsed content does not contain 'items' array. Parsed: " << parsed.dump(2) << std::endl; + } + return parsed; + } catch (const std::exception& e) { + std::cerr << "Error: Failed to parse Ollama response content as JSON: " << e.what() << std::endl; + std::cerr << "Content was: " << content.dump() << std::endl; + throw std::runtime_error("Failed to parse Ollama response as JSON: " + std::string(e.what()) + ". Content: " + content.dump()); + } + } else { + // Content might already be a JSON object + // Validate structure + if (!content.contains("items") || !content["items"].is_array()) { + std::cerr << "Warning: Content does not contain 'items' array. Content: " << content.dump(2) << std::endl; + } + return content; + } + } else { + std::cerr << "Error: Ollama API response missing 'content' field in message. Full response: " << response.dump(2) << std::endl; + throw std::runtime_error("Ollama API response missing message.content field. Response: " + response.dump()); + } + } else { + std::cerr << "Error: Ollama API response missing 'message' object. Full response: " << response.dump(2) << std::endl; + throw std::runtime_error("Ollama API response missing message field. Response: " + response.dump()); } - return {}; } nlohmann::json ExtractEmbeddingVector(const nlohmann::json& response) const override { diff --git a/src/model_manager/providers/adapters/azure.cpp b/src/model_manager/providers/adapters/azure.cpp index 753076e9..9a2f4ab5 100644 --- a/src/model_manager/providers/adapters/azure.cpp +++ b/src/model_manager/providers/adapters/azure.cpp @@ -31,14 +31,28 @@ void AzureProvider::AddCompletionRequest(const std::string& prompt, const int nu {"text", "ATTACHMENT COLUMN"}}); auto row_index = 1u; for (const auto& image: column["data"]) { + // Skip null values + if (image.is_null()) { + continue; + } message_content.push_back( {{"type", "text"}, {"text", "ROW " + std::to_string(row_index) + " :"}}); auto image_url = std::string(); - auto image_str = image.get(); - if (is_base64(image_str)) { - image_url = duckdb_fmt::format("data:{};base64,{}", mime_type, image_str); + std::string image_str; + if (image.is_string()) { + image_str = image.get(); } else { + image_str = image.dump(); + } + + // Handle file path or URL + if (URLHandler::IsUrl(image_str)) { + // URL - send directly to API image_url = image_str; + } else { + // File path - read and convert to base64 + auto base64_result = URLHandler::ResolveFileToBase64(image_str); + image_url = duckdb_fmt::format("data:{};base64,{}", mime_type, base64_result.base64_content); } message_content.push_back( diff --git a/src/model_manager/providers/adapters/ollama.cpp b/src/model_manager/providers/adapters/ollama.cpp index 07b619d8..0311fdd7 100644 --- a/src/model_manager/providers/adapters/ollama.cpp +++ b/src/model_manager/providers/adapters/ollama.cpp @@ -1,29 +1,46 @@ #include "flock/model_manager/providers/adapters/ollama.hpp" #include "flock/model_manager/providers/handlers/url_handler.hpp" +#include "flock/model_manager/providers/provider.hpp" namespace flock { void OllamaProvider::AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) { - nlohmann::json request_payload = {{"model", model_details_.model}, - {"prompt", prompt}, - {"stream", false}}; + // Build message for chat API + nlohmann::json message = {{"role", "user"}, {"content", prompt}}; + // Process image columns - images go in the message object as an "images" array auto images = nlohmann::json::array(); - // Process image columns if (media_data.contains("image") && !media_data["image"].empty() && media_data["image"].is_array()) { for (const auto& column: media_data["image"]) { if (column.contains("data") && column["data"].is_array()) { for (const auto& image: column["data"]) { - auto image_str = image.get(); - images.push_back(image_str); + // Skip null values + if (image.is_null()) { + continue; + } + std::string image_str; + if (image.is_string()) { + image_str = image.get(); + } else { + // Convert non-string values to string + image_str = image.dump(); + } + + // Handle file path or URL - resolve and convert to base64 + auto base64_result = URLHandler::ResolveFileToBase64(image_str); + images.push_back(base64_result.base64_content); } } } } if (!images.empty()) { - request_payload["images"] = images; + message["images"] = images; } + nlohmann::json request_payload = {{"model", model_details_.model}, + {"messages", nlohmann::json::array({message})}, + {"stream", false}}; + if (!model_details_.model_parameters.empty()) { request_payload.update(model_details_.model_parameters); } diff --git a/src/model_manager/providers/adapters/openai.cpp b/src/model_manager/providers/adapters/openai.cpp index 1eb14fc5..15907bab 100644 --- a/src/model_manager/providers/adapters/openai.cpp +++ b/src/model_manager/providers/adapters/openai.cpp @@ -31,14 +31,28 @@ void OpenAIProvider::AddCompletionRequest(const std::string& prompt, const int n {"text", "ATTACHMENT COLUMN"}}); auto row_index = 1u; for (const auto& image: column["data"]) { + // Skip null values + if (image.is_null()) { + continue; + } message_content.push_back( {{"type", "text"}, {"text", "ROW " + std::to_string(row_index) + " :"}}); auto image_url = std::string(); - auto image_str = image.get(); - if (is_base64(image_str)) { - image_url = duckdb_fmt::format("data:{};base64,{}", mime_type, image_str); + std::string image_str; + if (image.is_string()) { + image_str = image.get(); } else { + image_str = image.dump(); + } + + // Handle file path or URL + if (URLHandler::IsUrl(image_str)) { + // URL - send directly to API image_url = image_str; + } else { + // File path - read and convert to base64 + auto base64_result = URLHandler::ResolveFileToBase64(image_str); + image_url = duckdb_fmt::format("data:{};base64,{}", mime_type, base64_result.base64_content); } message_content.push_back( @@ -90,7 +104,16 @@ void OpenAIProvider::AddEmbeddingRequest(const std::vector& inputs) void OpenAIProvider::AddTranscriptionRequest(const nlohmann::json& audio_files) { for (const auto& audio_file: audio_files) { - auto audio_file_str = audio_file.get(); + // Skip null values + if (audio_file.is_null()) { + continue; + } + std::string audio_file_str; + if (audio_file.is_string()) { + audio_file_str = audio_file.get(); + } else { + audio_file_str = audio_file.dump(); + } // Handle file download and validation auto file_result = URLHandler::ResolveFilePath(audio_file_str); From 45cf50f846612d2be88a2162a6b46fd24f159225 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:08 -0500 Subject: [PATCH 41/59] Updated scalar functions to use unique ID generation for metrics tracking --- .../llm_first_or_last/implementation.cpp | 86 ++++++++++-- .../aggregate/llm_reduce/implementation.cpp | 11 +- .../aggregate/llm_rerank/implementation.cpp | 131 ++++++++++++++++-- .../scalar/llm_complete/implementation.cpp | 6 +- .../scalar/llm_embedding/implementation.cpp | 6 +- .../scalar/llm_filter/implementation.cpp | 6 +- src/functions/scalar/scalar.cpp | 1 - src/include/flock/metrics/manager.hpp | 7 + 8 files changed, 218 insertions(+), 36 deletions(-) diff --git a/src/functions/aggregate/llm_first_or_last/implementation.cpp b/src/functions/aggregate/llm_first_or_last/implementation.cpp index 3f249037..08fbc651 100644 --- a/src/functions/aggregate/llm_first_or_last/implementation.cpp +++ b/src/functions/aggregate/llm_first_or_last/implementation.cpp @@ -3,23 +3,88 @@ #include "flock/metrics/manager.hpp" #include +#include +#include #include namespace flock { int LlmFirstOrLast::GetFirstOrLastTupleId(nlohmann::json& tuples) { - nlohmann::json data; const auto [prompt, media_data] = PromptManager::Render(user_query, tuples, function_type, model.GetModelDetails().tuple_format); model.AddCompletionRequest(prompt, 1, OutputType::INTEGER, media_data); auto response = model.CollectCompletions()[0]; - return response["items"][0]; + + // Find flock_row_id column to get valid IDs + std::set valid_ids; + for (const auto& column: tuples) { + if (column.contains("name") && column["name"].is_string() && + column["name"].get() == "flock_row_id" && + column.contains("data") && column["data"].is_array()) { + for (const auto& id: column["data"]) { + if (id.is_string()) { + valid_ids.insert(id.get()); + } + } + break; + } + } + + // Get LLM response - can be integer or string + int result_id_int = -1; + std::string result_id_str; + if (response["items"][0].is_number_integer()) { + result_id_int = response["items"][0].get(); + result_id_str = std::to_string(result_id_int); + } else if (response["items"][0].is_string()) { + result_id_str = response["items"][0].get(); + try { + result_id_int = std::stoi(result_id_str); + } catch (...) { + throw std::runtime_error( + "Invalid LLM response: The LLM returned ID '" + result_id_str + + "' which is not a valid flock_row_id."); + } + } else { + throw std::runtime_error( + "Invalid LLM response: Expected integer or string ID, got: " + response["items"][0].dump()); + } + + // Validate that the ID exists in flock_row_id + if (valid_ids.find(result_id_str) == valid_ids.end()) { + throw std::runtime_error( + "Invalid LLM response: The LLM returned ID '" + result_id_str + + "' which is not a valid flock_row_id."); + } + + return result_id_int; } nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { + int num_tuples = static_cast(tuples[0]["data"].size()); + + // If there's only 1 tuple, no need to call the LLM - just return it + if (num_tuples <= 1) { + auto result = nlohmann::json::array(); + for (auto i = 0; i < static_cast(tuples.size()) - 1; i++) { + result.push_back(nlohmann::json::object()); + for (const auto& item: tuples[i].items()) { + if (item.key() == "data") { + result[i]["data"] = nlohmann::json::array(); + if (!item.value().empty()) { + result[i]["data"].push_back(item.value()[0]); + } + } else { + result[i][item.key()] = item.value(); + } + } + } + return result; + } + auto batch_tuples = nlohmann::json::array(); int start_index = 0; model = Model(model_details); - auto batch_size = std::min(model.GetModelDetails().batch_size, static_cast(tuples[0]["data"].size())); + auto batch_size = std::min(model.GetModelDetails().batch_size, num_tuples); if (batch_size <= 0) { throw std::runtime_error("Batch size must be greater than zero"); @@ -51,7 +116,8 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { auto result_idx = GetFirstOrLastTupleId(batch_tuples); batch_tuples.clear(); - for (auto i = 0; i < static_cast(tuples.size()); i++) { + // Build result excluding flock_row_id column (last column) + for (auto i = 0; i < static_cast(tuples.size()) - 1; i++) { batch_tuples.push_back(nlohmann::json::object()); for (const auto& item: tuples[i].items()) { if (item.key() == "data") { @@ -72,8 +138,6 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { } while (start_index < static_cast(tuples[0]["data"].size())); - batch_tuples.erase(batch_tuples.end() - 1); - return batch_tuples; } @@ -92,10 +156,10 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn // Process each state individually for (idx_t i = 0; i < count; i++) { - auto idx = i + offset; - auto* state = states_vector[idx]; + auto result_idx = i + offset; + auto* state = states_vector[i]; - if (state && !state->value->empty()) { + if (state && state->value && !state->value->empty()) { // Use model_details and user_query from the state (not static variables) Model model(state->model_details); auto model_details_obj = model.GetModelDetails(); @@ -135,9 +199,9 @@ void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateIn double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); MetricsManager::AddExecutionTime(exec_duration_ms); - result.SetValue(idx, response.dump()); + result.SetValue(result_idx, response.dump()); } else { - result.SetValue(idx, nullptr); + result.SetValue(result_idx, nullptr); } } diff --git a/src/functions/aggregate/llm_reduce/implementation.cpp b/src/functions/aggregate/llm_reduce/implementation.cpp index 815868a5..e43f6232 100644 --- a/src/functions/aggregate/llm_reduce/implementation.cpp +++ b/src/functions/aggregate/llm_reduce/implementation.cpp @@ -8,7 +8,6 @@ namespace flock { nlohmann::json LlmReduce::ReduceBatch(nlohmann::json& tuples, const AggregateFunctionType& function_type, const nlohmann::json& summary) { - nlohmann::json data; auto [prompt, media_data] = PromptManager::Render(user_query, tuples, function_type, model.GetModelDetails().tuple_format); prompt += "\n\n" + summary.dump(4); @@ -78,8 +77,8 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa // Process each state individually for (idx_t i = 0; i < count; i++) { - auto idx = i + offset; - auto* state = states_vector[idx]; + auto result_idx = i + offset; + auto* state = states_vector[i]; if (state && state->value && !state->value->empty()) { // Use model_details and user_query from the state @@ -112,12 +111,12 @@ void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputDa MetricsManager::AddExecutionTime(exec_duration_ms); if (response.is_string()) { - result.SetValue(idx, response.get()); + result.SetValue(result_idx, response.get()); } else { - result.SetValue(idx, response.dump()); + result.SetValue(result_idx, response.dump()); } } else { - result.SetValue(idx, nullptr); + result.SetValue(result_idx, nullptr); } } diff --git a/src/functions/aggregate/llm_rerank/implementation.cpp b/src/functions/aggregate/llm_rerank/implementation.cpp index e6a756e2..633b31a5 100644 --- a/src/functions/aggregate/llm_rerank/implementation.cpp +++ b/src/functions/aggregate/llm_rerank/implementation.cpp @@ -3,21 +3,101 @@ #include "flock/metrics/manager.hpp" #include +#include #include namespace flock { std::vector LlmRerank::RerankBatch(const nlohmann::json& tuples) { - nlohmann::json data; auto [prompt, media_data] = PromptManager::Render(user_query, tuples, AggregateFunctionType::RERANK, model.GetModelDetails().tuple_format); - model.AddCompletionRequest(prompt, static_cast(tuples[0]["data"].size()), OutputType::INTEGER, media_data); + + int num_tuples = static_cast(tuples[0]["data"].size()); + + model.AddCompletionRequest(prompt, num_tuples, OutputType::INTEGER, media_data); auto responses = model.CollectCompletions(); - return responses[0]["items"]; + + // Find flock_row_id column to get valid IDs + std::set valid_ids; + for (const auto& column: tuples) { + if (column.contains("name") && column["name"].is_string() && + column["name"].get() == "flock_row_id" && + column.contains("data") && column["data"].is_array()) { + for (const auto& id: column["data"]) { + if (id.is_string()) { + valid_ids.insert(id.get()); + } + } + break; + } + } + + std::vector indices; + std::set seen_ids; + + for (const auto& item: responses[0]["items"]) { + std::string id_str; + int id_int = -1; + + // Handle both integer and string responses + if (item.is_number_integer()) { + id_int = item.get(); + id_str = std::to_string(id_int); + } else if (item.is_string()) { + id_str = item.get(); + try { + id_int = std::stoi(id_str); + } catch (...) { + throw std::runtime_error( + "Invalid LLM response: The LLM returned ID '" + id_str + + "' which is not a valid flock_row_id."); + } + } else { + throw std::runtime_error( + "Invalid LLM response: Expected integer or string ID, got: " + item.dump()); + } + + // Validate that the ID exists in flock_row_id + if (valid_ids.find(id_str) == valid_ids.end()) { + throw std::runtime_error( + "Invalid LLM response: The LLM returned ID '" + id_str + + "' which is not a valid flock_row_id."); + } + + // Check for duplicates + if (seen_ids.count(id_str) > 0) { + throw std::runtime_error( + "Invalid LLM response: The LLM returned duplicate ID '" + id_str + "'."); + } + seen_ids.insert(id_str); + indices.push_back(id_int); + } + + return indices; }; nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { const auto num_tuples = static_cast(tuples[0]["data"].size()); + + // If there's only 1 tuple, no need to call the LLM - just return it + if (num_tuples <= 1) { + auto result = nlohmann::json::array(); + for (auto i = 0; i < static_cast(tuples.size()); i++) { + result.push_back(nlohmann::json::object()); + for (const auto& item: tuples[i].items()) { + if (item.key() == "data") { + result[i]["data"] = nlohmann::json::array(); + if (!item.value().empty()) { + result[i]["data"].push_back(item.value()[0]); + } + } else { + result[i][item.key()] = item.value(); + } + } + } + return result; + } + auto final_ranked_tuples = nlohmann::json::array(); auto carry_forward_tuples = nlohmann::json::array(); auto start_index = 0; @@ -36,7 +116,8 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { auto window_tuples = carry_forward_tuples; // Then add new tuples up to batch_size - auto remaining_space = batch_size - static_cast(window_tuples[0]["data"].size()); + // Handle case where carry_forward_tuples is empty (first iteration) + auto remaining_space = window_tuples.empty() ? batch_size : (batch_size - static_cast(window_tuples[0]["data"].size())); auto end_index = std::min(start_index + remaining_space, num_tuples); for (auto i = 0; i < static_cast(tuples.size()); i++) { if (i >= static_cast(window_tuples.size())) { @@ -59,6 +140,11 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { // Clear carry forward for next iteration carry_forward_tuples.clear(); + // Skip if window_tuples is empty (shouldn't happen, but safety check) + if (window_tuples.empty() || window_tuples[0]["data"].empty()) { + continue; + } + try { auto indexed_tuples = window_tuples; indexed_tuples.push_back(nlohmann::json::object()); @@ -72,6 +158,20 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { auto ranked_indices = RerankBatch(indexed_tuples); + // Initialize final_ranked_tuples structure if needed (first time adding results) + if (final_ranked_tuples.empty() && !window_tuples.empty()) { + for (auto i = 0u; i < window_tuples.size(); i++) { + final_ranked_tuples.push_back(nlohmann::json::object()); + // Copy metadata from window_tuples + for (const auto& item: window_tuples[i].items()) { + if (item.key() != "data") { + final_ranked_tuples[i][item.key()] = item.value(); + } + } + final_ranked_tuples[i]["data"] = nlohmann::json::array(); + } + } + // Add the bottom half to final results (they won't be re-ranked) auto half_batch = static_cast(ranked_indices.size()) / 2; for (auto i = half_batch; i < static_cast(ranked_indices.size()); i++) { @@ -83,6 +183,19 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { } // Carry forward top half to next batch for re-ranking + // Initialize carry_forward_tuples structure if needed + if (carry_forward_tuples.empty() && !window_tuples.empty()) { + for (auto i = 0u; i < window_tuples.size(); i++) { + carry_forward_tuples.push_back(nlohmann::json::object()); + // Copy metadata from window_tuples + for (const auto& item: window_tuples[i].items()) { + if (item.key() != "data") { + carry_forward_tuples[i][item.key()] = item.value(); + } + } + carry_forward_tuples[i]["data"] = nlohmann::json::array(); + } + } for (auto i = 0; i < half_batch; i++) { auto idx = 0u; for (auto& column: window_tuples) { @@ -128,10 +241,10 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg // Process each state individually for (idx_t i = 0; i < count; i++) { - auto idx = i + offset; - auto* state = states_vector[idx]; + auto result_idx = i + offset; + auto* state = states_vector[i]; - if (state && !state->value->empty()) { + if (state && state->value && !state->value->empty()) { // Use model_details and user_query from the state (not static variables) Model model(state->model_details); auto model_details_obj = model.GetModelDetails(); @@ -165,9 +278,9 @@ void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& agg double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); MetricsManager::AddExecutionTime(exec_duration_ms); - result.SetValue(idx, reranked_tuples.dump()); + result.SetValue(result_idx, reranked_tuples.dump()); } else { - result.SetValue(idx, nullptr); + result.SetValue(result_idx, nullptr); } } diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index f4da88ea..5055d64e 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -71,13 +71,13 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { } void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and state ID for metrics + // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); - const void* state_id = static_cast(&state); + const void* invocation_id = MetricsManager::GenerateUniqueId(); // Start metrics tracking - MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_COMPLETE); + MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_COMPLETE); // Start execution timing auto exec_start = std::chrono::high_resolution_clock::now(); diff --git a/src/functions/scalar/llm_embedding/implementation.cpp b/src/functions/scalar/llm_embedding/implementation.cpp index 38dde7e1..fa30e428 100644 --- a/src/functions/scalar/llm_embedding/implementation.cpp +++ b/src/functions/scalar/llm_embedding/implementation.cpp @@ -79,13 +79,13 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC } void LlmEmbedding::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and state ID for metrics + // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); - const void* state_id = static_cast(&state); + const void* invocation_id = MetricsManager::GenerateUniqueId(); // Start metrics tracking - MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_EMBEDDING); + MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_EMBEDDING); auto exec_start = std::chrono::high_resolution_clock::now(); diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index fbd8640d..bd6c8189 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -57,13 +57,13 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { } void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and state ID for metrics + // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); - const void* state_id = static_cast(&state); + const void* invocation_id = MetricsManager::GenerateUniqueId(); // Start metrics tracking - MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_FILTER); + MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_FILTER); auto exec_start = std::chrono::high_resolution_clock::now(); diff --git a/src/functions/scalar/scalar.cpp b/src/functions/scalar/scalar.cpp index b2861327..6b9335ee 100644 --- a/src/functions/scalar/scalar.cpp +++ b/src/functions/scalar/scalar.cpp @@ -4,7 +4,6 @@ namespace flock { nlohmann::json ScalarFunctionBase::Complete(nlohmann::json& columns, const std::string& user_prompt, ScalarFunctionType function_type, Model& model) { - nlohmann::json data; const auto [prompt, media_data] = PromptManager::Render(user_prompt, columns, function_type, model.GetModelDetails().tuple_format); OutputType output_type = OutputType::STRING; if (function_type == ScalarFunctionType::FILTER) { diff --git a/src/include/flock/metrics/manager.hpp b/src/include/flock/metrics/manager.hpp index 629c5ff7..cb71a350 100644 --- a/src/include/flock/metrics/manager.hpp +++ b/src/include/flock/metrics/manager.hpp @@ -4,6 +4,7 @@ #include "duckdb/main/database.hpp" #include "flock/metrics/base_manager.hpp" #include "flock/metrics/types.hpp" +#include #include #include @@ -30,6 +31,12 @@ class MetricsManager : public BaseMetricsManager { return *it->second; } + // Generate a unique invocation ID for scalar functions + static const void* GenerateUniqueId() { + static std::atomic counter{0}; + return reinterpret_cast(++counter); + } + // Initialize metrics tracking (stores context for subsequent calls) static void StartInvocation(duckdb::DatabaseInstance* db, const void* state_id, FunctionType type) { if (db != nullptr && state_id != nullptr) { From 6f02270ef285389b00c16ca8d2e5ff6152f9ae0b Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:25 -0500 Subject: [PATCH 42/59] Updated integration test configuration and database setup --- test/integration/src/integration/conftest.py | 80 ++++++++++++++----- .../src/integration/setup_test_db.py | 41 +--------- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/test/integration/src/integration/conftest.py b/test/integration/src/integration/conftest.py index eff66a20..2e8426af 100644 --- a/test/integration/src/integration/conftest.py +++ b/test/integration/src/integration/conftest.py @@ -3,12 +3,32 @@ import pytest from pathlib import Path from dotenv import load_dotenv -import base64 -import requests from integration.setup_test_db import setup_test_db load_dotenv() +TEST_AUDIO_FILE_PATH = Path(__file__).parent / "tests" / "flock_test_audio.mp3" + + +def get_audio_file_path(): + return str(TEST_AUDIO_FILE_PATH.resolve()) + + +def get_secrets_setup_sql(): + openai_key = os.getenv("OPENAI_API_KEY", "") + ollama_url = os.getenv("API_URL", "http://localhost:11434") + + secrets_sql = [] + + if openai_key: + secrets_sql.append(f"CREATE SECRET (TYPE OPENAI, API_KEY '{openai_key}');") + + if ollama_url: + secrets_sql.append(f"CREATE SECRET (TYPE OLLAMA, API_URL '{ollama_url}');") + + return " ".join(secrets_sql) + + @pytest.fixture(scope="session") def integration_setup(tmp_path_factory): duckdb_cli_path = os.getenv("DUCKDB_CLI_PATH", "duckdb") @@ -25,33 +45,49 @@ def integration_setup(tmp_path_factory): if os.path.exists(test_db_path): os.remove(test_db_path) -def run_cli(duckdb_cli_path, db_path, query): - return subprocess.run( + +def run_cli(duckdb_cli_path, db_path, query, with_secrets=True): + if with_secrets: + secrets_sql = get_secrets_setup_sql() + if secrets_sql: + query = f"{secrets_sql} {query}" + + result = subprocess.run( [duckdb_cli_path, db_path, "-csv", "-c", query], capture_output=True, text=True, check=False, ) + # Filter out the secret creation output (Success, true lines) from stdout + if with_secrets and result.stdout: + lines = result.stdout.split("\n") + # Remove lines that are just "Success" or "true" from secret creation + filtered_lines = [] + skip_count = 0 + for line in lines: + stripped = line.strip() + if skip_count > 0 and stripped in ("true", "false"): + skip_count -= 1 + continue + if stripped == "Success": + skip_count = 1 # Skip the next line (true/false) + continue + filtered_lines.append(line) + result = subprocess.CompletedProcess( + args=result.args, + returncode=result.returncode, + stdout="\n".join(filtered_lines), + stderr=result.stderr, + ) + + return result + + def get_image_data_for_provider(image_url, provider): """ Get image data in the appropriate format based on the provider. - OpenAI uses URLs directly, Ollama uses base64 encoding. + Now all providers support URLs directly - the C++ code handles + downloading and converting to base64 for providers that need it (Ollama). """ - if provider == "openai": - return image_url - elif provider == "ollama": - # Fetch the image and convert to base64 - try: - response = requests.get(image_url, timeout=10) - response.raise_for_status() - image_base64 = base64.b64encode(response.content).decode("utf-8") - return image_base64 - except Exception as e: - # Fallback to URL if fetching fails - print( - f"Warning: Failed to fetch image {image_url}: {e}. Using URL instead." - ) - return image_url - else: - return image_url + return image_url diff --git a/test/integration/src/integration/setup_test_db.py b/test/integration/src/integration/setup_test_db.py index 8f9913fe..5aa828ec 100644 --- a/test/integration/src/integration/setup_test_db.py +++ b/test/integration/src/integration/setup_test_db.py @@ -1,24 +1,16 @@ #!/usr/bin/env python3 -""" -Test Database Setup Script for FlockMTL Integration Tests - -This script creates and manages a persistent test database with pre-configured -models, prompts, and test data to reduce setup time during integration testing. -""" import os import subprocess from pathlib import Path from dotenv import load_dotenv -# Load environment variables load_dotenv() -# Configuration DUCKDB_CLI_PATH = os.getenv("DUCKDB_CLI_PATH", "duckdb") + def run_sql_command(db_path: str, sql_command: str, description: str = ""): - """Execute SQL command using DuckDB CLI.""" try: result = subprocess.run( [DUCKDB_CLI_PATH, db_path, "-c", sql_command], @@ -35,35 +27,6 @@ def run_sql_command(db_path: str, sql_command: str, description: str = ""): print(f" Error: {e.stderr}") return None -def create_base_test_secrets(db_path: str): - """Create basic test secrets for LLM functions.""" - secrets = { - "openai": (os.getenv("OPENAI_API_KEY")), - "ollama": (os.getenv("API_URL", "http://localhost:11434")) - } - - def create_openai_secret(secret_key): - return f"""CREATE PERSISTENT SECRET IF NOT EXISTS ( - TYPE OPENAI, - API_KEY '{secret_key}' - );""" - - def create_ollama_secret(secret_key): - return f"""CREATE PERSISTENT SECRET IF NOT EXISTS ( - TYPE OLLAMA, - API_URL '{secret_key}' - );""" - - print("Creating test secrets...") - for secret_name, secret_value in secrets.items(): - if secret_name == "openai": - sql = create_openai_secret(secret_value) - elif secret_name == "ollama": - sql = create_ollama_secret(secret_value) - else: - continue - run_sql_command(db_path, sql, f"Secret: {secret_name}") def setup_test_db(db_path): - - create_base_test_secrets(db_path) + pass From 58b827b8560f6c347fbd523a51d53dc8b21104be Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:29 -0500 Subject: [PATCH 43/59] Updated integration tests for aggregate LLM functions --- .../functions/aggregate/test_llm_first.py | 164 +++++++++++------- .../functions/aggregate/test_llm_last.py | 151 +++++++++------- .../functions/aggregate/test_llm_reduce.py | 157 +++++++++-------- .../functions/aggregate/test_llm_rerank.py | 149 +++++++++------- 4 files changed, 369 insertions(+), 252 deletions(-) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py index 3143f9dc..01fa1ea3 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_first.py @@ -1,10 +1,27 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +import json +import csv +from io import StringIO +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -17,7 +34,9 @@ def test_llm_first_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + r = run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) + + assert r.returncode == 0, f"Query failed with error: {create_model_query} {r.stderr}" create_table_query = """ CREATE OR REPLACE TABLE candidates ( @@ -64,7 +83,7 @@ def test_llm_first_with_group_by(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE job_applications ( @@ -122,7 +141,7 @@ def test_llm_first_with_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE investment_options ( @@ -171,7 +190,7 @@ def test_llm_first_with_model_parameters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE startup_pitches ( @@ -223,7 +242,7 @@ def test_llm_first_multiple_criteria(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE course_options ( @@ -273,7 +292,7 @@ def test_llm_first_empty_table(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE empty_candidates ( @@ -345,7 +364,7 @@ def test_llm_first_error_handling_empty_prompt(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -386,7 +405,7 @@ def test_llm_first_error_handling_missing_arguments(integration_setup, model_con create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with only 1 argument (should fail since llm_first requires 2) query = ( @@ -412,7 +431,7 @@ def test_llm_first_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE international_universities ( @@ -461,7 +480,7 @@ def _test_llm_first_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_candidate_pool AS @@ -498,16 +517,16 @@ def _test_llm_first_performance_large_dataset(integration_setup, model_config): assert "category" in result.stdout.lower() -def test_llm_first_with_image_integration(integration_setup, model_config): +def test_llm_first_with_image_integration(integration_setup, model_config_image): """Test llm_first with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-first-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE pet_images ( @@ -567,16 +586,16 @@ def test_llm_first_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_first_image_with_group_by(integration_setup, model_config): +def test_llm_first_image_with_group_by(integration_setup, model_config_image): """Test llm_first with images and GROUP BY clause.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-group-first_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE artwork_images ( @@ -641,7 +660,9 @@ def test_llm_first_image_with_group_by(integration_setup, model_config): ) result = run_cli(duckdb_cli_path, db_path, query) - assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert result.returncode == 0, ( + f"Query failed with error: {result.stdout} {result.stderr}" + ) lines = result.stdout.strip().split("\n") assert len(lines) >= 4, ( f"Expected at least 4 lines (header + 3 styles), got {len(lines)}" @@ -649,16 +670,16 @@ def test_llm_first_image_with_group_by(integration_setup, model_config): assert "most_recent_artwork" in result.stdout.lower() -def test_llm_first_image_batch_processing(integration_setup, model_config): +def test_llm_first_image_batch_processing(integration_setup, model_config_image): """Test llm_first with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-batch-first_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE building_images ( @@ -698,15 +719,15 @@ def test_llm_first_image_batch_processing(integration_setup, model_config): 200, 2020), (4, 'Corporate Center', '{corporate_image}', 'Miami', 180, - 2019); \ + 2019); """ run_cli(duckdb_cli_path, db_path, insert_data_query) query = ( """ - SELECT city, - llm_first( - {'model_name': '""" + SELECT city, + llm_first( + {'model_name': '""" + test_model_name + """', 'batch_size': 2}, { @@ -720,7 +741,7 @@ def test_llm_first_image_batch_processing(integration_setup, model_config): ) AS tallest_building FROM building_images GROUP BY city - ORDER BY city; \ + ORDER BY city; """ ) result = run_cli(duckdb_cli_path, db_path, query) @@ -733,44 +754,40 @@ def test_llm_first_image_batch_processing(integration_setup, model_config): assert "tallest_building" in result.stdout.lower() -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models (OpenAI/Azure only).""" - return request.param - - -def test_llm_first_with_audio_transcription(integration_setup, transcription_model_config): +def test_llm_first_with_audio_transcription(integration_setup, model_config): """Test llm_first with audio transcription using OpenAI.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-first_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-first_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() create_table_query = """ - CREATE OR REPLACE TABLE audio_candidates ( + CREATE OR REPLACE TABLE audio_descriptions ( id INTEGER, - audio_url VARCHAR, - name VARCHAR + audio_path VARCHAR, + topic VARCHAR ); """ run_cli(duckdb_cli_path, db_path, create_table_query) - insert_data_query = """ - INSERT INTO audio_candidates + # Both rows have the same audio about Flock/DuckDB + insert_data_query = f""" + INSERT INTO audio_descriptions VALUES - (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), - (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'); + (0, '{audio_path}', 'Database'), + (1, '{audio_path}', 'AI'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) @@ -781,31 +798,42 @@ def test_llm_first_with_audio_transcription(integration_setup, transcription_mod + test_model_name + """'}, { - 'prompt': 'Which candidate has the best audio interview? Return the ID number only.', + 'prompt': 'Based on the audio content, which description best relates to database technology? Return the ID number (0 or 1) only.', 'context_columns': [ { - 'data': audio_url, + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name + """' - } + }, + {'data': topic} ] } - ) AS selected_candidate - FROM audio_candidates; + ) AS selected_id + FROM audio_descriptions; """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "selected_candidate" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "selected_id" in result.stdout.lower() + + # Parse the JSON output to verify the returned tuple + lines = result.stdout.strip().split("\n") + assert len(lines) >= 2, "Expected at least header and one result row" + + # Parse CSV output to get the JSON result + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "selected_id" in row + + # Parse the JSON result which contains the tuple data + result_json = json.loads(row["selected_id"]) + assert isinstance(result_json, list), ( + f"Expected list of tuples, got: {type(result_json)}" + ) + assert len(result_json) > 0, "Expected at least one tuple in result" def test_llm_first_audio_ollama_error(integration_setup): @@ -813,14 +841,16 @@ def test_llm_first_audio_ollama_error(integration_setup): duckdb_cli_path, db_path = integration_setup test_model_name = "test-ollama-first-audio" - create_model_query = "CREATE MODEL('test-ollama-first-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = ( + "CREATE MODEL('test-ollama-first-audio', 'gemma3:1b', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = "test-ollama-first-transcription" create_transcription_model_query = ( - "CREATE MODEL('test-ollama-first-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-first-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_audio ( @@ -831,7 +861,9 @@ def test_llm_first_audio_ollama_error(integration_setup): run_cli(duckdb_cli_path, db_path, create_table_query) insert_data_query = """ - INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + INSERT INTO test_audio VALUES + (1, 'https://example.com/audio1.mp3'), + (2, 'https://example.com/audio2.mp3'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py index 0ce9a4a3..8485f6a0 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_last.py @@ -1,10 +1,27 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +import json +import csv +from io import StringIO +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -17,7 +34,7 @@ def test_llm_last_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE products ( @@ -65,7 +82,7 @@ def test_llm_last_with_group_by(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE restaurant_reviews ( @@ -121,7 +138,7 @@ def test_llm_last_with_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE service_providers ( @@ -172,7 +189,7 @@ def test_llm_last_with_model_parameters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE movie_reviews ( @@ -224,7 +241,7 @@ def test_llm_last_multiple_criteria(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE housing_options ( @@ -275,7 +292,7 @@ def test_llm_last_empty_table(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE empty_products ( @@ -347,7 +364,7 @@ def test_llm_last_error_handling_empty_prompt(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -388,7 +405,7 @@ def test_llm_last_error_handling_missing_arguments(integration_setup, model_conf create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with only 1 argument (should fail since llm_last requires 2) query = ( @@ -414,7 +431,7 @@ def test_llm_last_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE travel_destinations ( @@ -462,7 +479,7 @@ def _test_llm_last_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_product_pool AS @@ -499,15 +516,15 @@ def _test_llm_last_performance_large_dataset(integration_setup, model_config): assert "category" in result.stdout.lower() -def test_llm_last_with_image_integration(integration_setup, model_config): +def test_llm_last_with_image_integration(integration_setup, model_config_image): """Test llm_last with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image create_model_query = ( f"CREATE MODEL('test-image-last-model', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE flower_images ( @@ -564,15 +581,15 @@ def test_llm_last_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_last_image_with_group_by(integration_setup, model_config): +def test_llm_last_image_with_group_by(integration_setup, model_config_image): """Test llm_last with images and GROUP BY clause.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image create_model_query = ( f"CREATE MODEL('test-image-group-last', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE car_images ( @@ -639,15 +656,15 @@ def test_llm_last_image_with_group_by(integration_setup, model_config): assert "oldest_car" in result.stdout.lower() -def test_llm_last_image_batch_processing(integration_setup, model_config): +def test_llm_last_image_batch_processing(integration_setup, model_config_image): """Test llm_last with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image create_model_query = ( f"CREATE MODEL('test-image-batch-last', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE restaurant_images ( @@ -715,47 +732,49 @@ def test_llm_last_image_batch_processing(integration_setup, model_config): assert "lowest_rated_restaurant" in result.stdout.lower() -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models (OpenAI/Azure only).""" - return request.param - +def test_llm_last_with_audio_transcription(integration_setup, model_config): + """Test llm_last with audio transcription using OpenAI. -def test_llm_last_with_audio_transcription(integration_setup, transcription_model_config): - """Test llm_last with audio transcription using OpenAI.""" + The audio content says: 'Flock transforms DuckDB into a hybrid database and a semantic AI engine' + This test verifies that the audio is correctly transcribed and the LLM can reason about the content. + """ duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-last_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-last_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() + # Create table with topics - one about Flock/DuckDB (audio content), one unrelated create_table_query = """ - CREATE OR REPLACE TABLE audio_candidates ( + CREATE OR REPLACE TABLE audio_topics ( id INTEGER, - audio_url VARCHAR, - name VARCHAR + topic VARCHAR, + audio_path VARCHAR ); """ run_cli(duckdb_cli_path, db_path, create_table_query) - insert_data_query = """ - INSERT INTO audio_candidates + # Row 1 has no real audio (empty), Row 2 has the actual Flock audio + insert_data_query = f""" + INSERT INTO audio_topics VALUES - (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), - (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'); + (1, 'Weather Forecast', '{audio_path}'), + (2, 'Database Technology', '{audio_path}'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) + # Ask which topic is about databases based on the audio query = ( """ SELECT llm_last( @@ -763,10 +782,11 @@ def test_llm_last_with_audio_transcription(integration_setup, transcription_mode + test_model_name + """'}, { - 'prompt': 'Which candidate has the worst audio interview? Return the ID number only.', + 'prompt': 'Based on the topic and audio content (if available), which entry is about databases or Flock? Return the topic name.', 'context_columns': [ + {'data': topic, 'type': 'text'}, { - 'data': audio_url, + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -774,20 +794,29 @@ def test_llm_last_with_audio_transcription(integration_setup, transcription_mode } ] } - ) AS selected_candidate - FROM audio_candidates; + ) AS selected_topic + FROM audio_topics; """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "selected_candidate" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse the JSON output to verify the returned tuple + lines = result.stdout.strip().split("\n") + assert len(lines) >= 2, "Expected at least header and one result row" + + # Parse CSV output to get the JSON result + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "selected_topic" in row + + # Parse the JSON result which contains the tuple data + result_json = json.loads(row["selected_topic"]) + assert isinstance(result_json, list), ( + f"Expected list of tuples, got: {type(result_json)}" + ) + assert len(result_json) > 0, "Expected at least one tuple in result" def test_llm_last_audio_ollama_error(integration_setup): @@ -795,14 +824,16 @@ def test_llm_last_audio_ollama_error(integration_setup): duckdb_cli_path, db_path = integration_setup test_model_name = "test-ollama-last-audio" - create_model_query = "CREATE MODEL('test-ollama-last-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = ( + "CREATE MODEL('test-ollama-last-audio', 'gemma3:1b', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = "test-ollama-last-transcription" create_transcription_model_query = ( - "CREATE MODEL('test-ollama-last-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-last-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_audio ( @@ -813,7 +844,9 @@ def test_llm_last_audio_ollama_error(integration_setup): run_cli(duckdb_cli_path, db_path, create_table_query) insert_data_query = """ - INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + INSERT INTO test_audio VALUES + (1, 'https://example.com/audio1.mp3'), + (2, 'https://example.com/audio2.mp3'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py index 0dd670c6..e72fe199 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_reduce.py @@ -1,10 +1,24 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -17,7 +31,7 @@ def test_llm_reduce_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE products ( @@ -44,7 +58,7 @@ def test_llm_reduce_basic_functionality(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize the following product descriptions into a single comprehensive summary', 'context_columns': [{'data': description}]} + {'prompt': 'Summarize these products in exactly 5 words', 'context_columns': [{'data': description}]} ) AS product_summary FROM products; \ """ @@ -67,7 +81,7 @@ def test_llm_reduce_with_group_by(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_reviews ( @@ -98,7 +112,7 @@ def test_llm_reduce_with_group_by(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Create a brief summary of these product reviews', 'context_columns': [{'data': review_text}]} + {'prompt': 'Summarize in 3 words', 'context_columns': [{'data': review_text}]} ) AS category_summary FROM product_reviews GROUP BY product_category @@ -127,7 +141,7 @@ def test_llm_reduce_multiple_columns(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE employee_feedback ( @@ -155,7 +169,7 @@ def test_llm_reduce_multiple_columns(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize the team feedback and overall performance', 'context_columns': [{'data': employee_name}, {'data': feedback}, {'data': rating::VARCHAR}]} + {'prompt': 'Rate team in one word', 'context_columns': [{'data': employee_name}, {'data': feedback}, {'data': rating::VARCHAR}]} ) AS team_summary FROM employee_feedback GROUP BY department; \ @@ -177,7 +191,7 @@ def test_llm_reduce_with_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE articles ( @@ -204,7 +218,7 @@ def test_llm_reduce_with_batch_processing(integration_setup, model_config): {'model_name': '""" + test_model_name + """', 'batch_size': 2}, - {'prompt': 'Create a comprehensive summary of these articles', 'context_columns': [{'data': title}, {'data': content}]} + {'prompt': 'List topics in 5 words max', 'context_columns': [{'data': title}, {'data': content}]} ) AS articles_summary FROM articles; \ """ @@ -226,7 +240,7 @@ def test_llm_reduce_with_model_parameters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE news_items ( @@ -251,7 +265,7 @@ def test_llm_reduce_with_model_parameters(integration_setup, model_config): + test_model_name + """', 'tuple_format': 'Markdown', 'model_parameters': '{"temperature": 0.1}'}, - {'prompt': 'Provide a concise summary of these news items', 'context_columns': [{'data': headline}, {'data': summary}]} + {'prompt': 'Summarize in 3 words', 'context_columns': [{'data': headline}, {'data': summary}]} ) AS news_summary FROM news_items; \ """ @@ -271,7 +285,7 @@ def test_llm_reduce_empty_table(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE empty_data ( @@ -343,7 +357,7 @@ def test_llm_reduce_error_handling_empty_prompt(integration_setup, model_config) create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -384,7 +398,7 @@ def test_llm_reduce_error_handling_missing_arguments(integration_setup, model_co create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with only 1 argument (should fail since llm_reduce requires 2) query = ( @@ -410,7 +424,7 @@ def test_llm_reduce_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE international_content ( @@ -434,7 +448,7 @@ def test_llm_reduce_with_special_characters(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Summarize these international text samples', 'context_columns': [{'data': text}]} + {'prompt': 'Describe in 3 words', 'context_columns': [{'data': text}]} ) AS summary FROM international_content; \ """ @@ -454,7 +468,7 @@ def test_llm_reduce_with_structured_output(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE structured_data ( @@ -519,7 +533,7 @@ def _test_llm_reduce_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_dataset AS @@ -538,7 +552,7 @@ def _test_llm_reduce_performance_large_dataset(integration_setup, model_config): {'model_name': '""" + test_model_name + """', 'batch_size': 10}, - {'prompt': 'Create a comprehensive summary of all items in this category', 'context_columns': [{'data': content}]} + {'prompt': 'Summarize in 3 words', 'context_columns': [{'data': content}]} ) AS category_summary FROM large_dataset GROUP BY category @@ -555,16 +569,16 @@ def _test_llm_reduce_performance_large_dataset(integration_setup, model_config): assert "category" in result.stdout.lower() -def test_llm_reduce_with_image_integration(integration_setup, model_config): +def test_llm_reduce_with_image_integration(integration_setup, model_config_image): """Test llm_reduce with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-reduce-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE animal_images ( @@ -605,7 +619,7 @@ def test_llm_reduce_with_image_integration(integration_setup, model_config): + test_model_name + """'}, { - 'prompt': 'Summarize the next data in json do not miss any data', + 'prompt': 'List animal names only', 'context_columns': [ {'data': name}, {'data': image, 'type': 'image'} @@ -623,16 +637,16 @@ def test_llm_reduce_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_reduce_image_with_group_by(integration_setup, model_config): +def test_llm_reduce_image_with_group_by(integration_setup, model_config_image): """Test llm_reduce with images and GROUP BY clause.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-group-reduce_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_images ( @@ -685,7 +699,7 @@ def test_llm_reduce_image_with_group_by(integration_setup, model_config): + test_model_name + """'}, { - 'prompt': 'Analyze these product images in this category and provide a summary of their design characteristics and market positioning.', + 'prompt': 'List product names in 5 words max', 'context_columns': [ {'data': product_name}, {'data': image_url, 'type': 'image'}, @@ -708,16 +722,16 @@ def test_llm_reduce_image_with_group_by(integration_setup, model_config): assert "category_analysis" in result.stdout.lower() -def test_llm_reduce_image_batch_processing(integration_setup, model_config): +def test_llm_reduce_image_batch_processing(integration_setup, model_config_image): """Test llm_reduce with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-batch-reduce_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE landscape_photos ( @@ -768,7 +782,7 @@ def test_llm_reduce_image_batch_processing(integration_setup, model_config): + test_model_name + """', 'batch_size': 3}, { - 'prompt': 'Analyze these landscape photographs and create a comprehensive summary of the natural environments, weather conditions, and seasonal characteristics shown.', + 'prompt': 'List locations in 5 words max', 'context_columns': [ {'data': location}, {'data': image_url, 'type': 'image'}, @@ -787,44 +801,46 @@ def test_llm_reduce_image_batch_processing(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models (OpenAI/Azure only).""" - return request.param - +def test_llm_reduce_with_audio_transcription(integration_setup, model_config): + """Test llm_reduce with audio transcription using OpenAI. -def test_llm_reduce_with_audio_transcription(integration_setup, transcription_model_config): - """Test llm_reduce with audio transcription using OpenAI.""" + The audio content says: 'Flock transforms DuckDB into a hybrid database and a semantic AI engine' + This test verifies that the audio is correctly transcribed and reduced into a summary. + """ duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-reduce_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-reduce_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli( + duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + # Get audio file path + audio_path = get_audio_file_path() + + # Create table with different topics and the same Flock audio create_table_query = """ - CREATE OR REPLACE TABLE audio_reviews ( + CREATE OR REPLACE TABLE audio_content ( id INTEGER, - audio_url VARCHAR, - product_name VARCHAR + topic VARCHAR, + audio_path VARCHAR ); """ run_cli(duckdb_cli_path, db_path, create_table_query) - insert_data_query = """ - INSERT INTO audio_reviews + insert_data_query = f""" + INSERT INTO audio_content VALUES - (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Headphones'), - (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Speaker'); + (1, 'Technology Overview', '{audio_path}'), + (2, 'Product Demo', '{audio_path}'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) @@ -835,10 +851,11 @@ def test_llm_reduce_with_audio_transcription(integration_setup, transcription_mo + test_model_name + """'}, { - 'prompt': 'Summarize the key points from these audio reviews', + 'prompt': 'What product is discussed? Answer in 5 words max.', 'context_columns': [ + {'data': topic, 'type': 'text'}, { - 'data': audio_url, + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -847,19 +864,17 @@ def test_llm_reduce_with_audio_transcription(integration_setup, transcription_mo ] } ) AS audio_summary - FROM audio_reviews; + FROM audio_content; """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "audio_summary" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + # The summary should mention Flock, DuckDB, database, or related terms from the audio + result_lower = result.stdout.lower() + assert any(kw in result_lower for kw in AUDIO_EXPECTED_KEYWORDS), ( + f"Expected summary to contain keywords from audio content {AUDIO_EXPECTED_KEYWORDS}. Got: {result.stdout}" + ) def test_llm_reduce_audio_ollama_error(integration_setup): @@ -867,14 +882,18 @@ def test_llm_reduce_audio_ollama_error(integration_setup): duckdb_cli_path, db_path = integration_setup test_model_name = "test-ollama-reduce-audio" - create_model_query = "CREATE MODEL('test-ollama-reduce-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = ( + "CREATE MODEL('test-ollama-reduce-audio', 'gemma3:1b', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = "test-ollama-reduce-transcription" create_transcription_model_query = ( - "CREATE MODEL('test-ollama-reduce-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-reduce-transcription', 'gemma3:1b', 'ollama');" + ) + run_cli( + duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) create_table_query = """ CREATE OR REPLACE TABLE test_audio ( @@ -885,7 +904,9 @@ def test_llm_reduce_audio_ollama_error(integration_setup): run_cli(duckdb_cli_path, db_path, create_table_query) insert_data_query = """ - INSERT INTO test_audio VALUES (1, 'https://example.com/audio.mp3'); + INSERT INTO test_audio VALUES + (1, 'https://example.com/audio1.mp3'), + (2, 'https://example.com/audio2.mp3'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) diff --git a/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py b/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py index 537837b2..6aec9a99 100644 --- a/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py +++ b/test/integration/src/integration/tests/functions/aggregate/test_llm_rerank.py @@ -1,10 +1,27 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +import json +import csv +from io import StringIO +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -17,7 +34,7 @@ def test_llm_rerank_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE search_results ( @@ -67,7 +84,7 @@ def test_llm_rerank_with_group_by(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_listings ( @@ -129,7 +146,7 @@ def test_llm_rerank_with_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE job_candidates ( @@ -182,7 +199,7 @@ def test_llm_rerank_with_model_parameters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE restaurant_options ( @@ -232,7 +249,7 @@ def test_llm_rerank_multiple_criteria(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE investment_funds ( @@ -282,7 +299,7 @@ def test_llm_rerank_empty_table(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE empty_items ( @@ -356,7 +373,7 @@ def test_llm_rerank_error_handling_empty_prompt(integration_setup, model_config) create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -397,7 +414,7 @@ def test_llm_rerank_error_handling_missing_arguments(integration_setup, model_co create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with only 2 arguments (should fail since llm_rerank requires 3) query = ( @@ -424,7 +441,7 @@ def test_llm_rerank_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE international_dishes ( @@ -470,7 +487,7 @@ def _test_llm_rerank_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_search_results AS @@ -508,16 +525,16 @@ def _test_llm_rerank_performance_large_dataset(integration_setup, model_config): assert "category" in result.stdout.lower() -def test_llm_rerank_with_image_integration(integration_setup, model_config): +def test_llm_rerank_with_image_integration(integration_setup, model_config_image): """Test llm_rerank with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-rerank-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE fashion_images ( @@ -583,16 +600,16 @@ def test_llm_rerank_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_rerank_image_with_group_by(integration_setup, model_config): +def test_llm_rerank_image_with_group_by(integration_setup, model_config_image): """Test llm_rerank with images and GROUP BY clause.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-group-rerank_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE interior_images ( @@ -665,16 +682,16 @@ def test_llm_rerank_image_with_group_by(integration_setup, model_config): assert "ranked_room_designs" in result.stdout.lower() -def test_llm_rerank_image_batch_processing(integration_setup, model_config): +def test_llm_rerank_image_batch_processing(integration_setup, model_config_image): """Test llm_rerank with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-batch-rerank_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE travel_destination_images ( @@ -746,48 +763,50 @@ def test_llm_rerank_image_batch_processing(integration_setup, model_config): assert "ranked_destinations" in result.stdout.lower() -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models (OpenAI/Azure only).""" - return request.param - +def test_llm_rerank_with_audio_transcription(integration_setup, model_config): + """Test llm_rerank with audio transcription using OpenAI. -def test_llm_rerank_with_audio_transcription(integration_setup, transcription_model_config): - """Test llm_rerank with audio transcription using OpenAI.""" + The audio content says: 'Flock transforms DuckDB into a hybrid database and a semantic AI engine' + This test verifies that the audio is correctly transcribed and used for reranking. + """ duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-rerank_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-rerank_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() + # Create table with topics - mix database-related (with audio) and unrelated topics create_table_query = """ - CREATE OR REPLACE TABLE audio_candidates ( + CREATE OR REPLACE TABLE audio_topics ( id INTEGER, - audio_url VARCHAR, - name VARCHAR + topic VARCHAR, + audio_path VARCHAR ); """ run_cli(duckdb_cli_path, db_path, create_table_query) - insert_data_query = """ - INSERT INTO audio_candidates + # Only the Database Technology row has the actual audio + insert_data_query = f""" + INSERT INTO audio_topics VALUES - (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Alice'), - (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Bob'), - (3, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Carol'); + (1, 'Weather Updates', '{audio_path}'), + (2, 'Database Technology', '{audio_path}'), + (3, 'Sports News', '{audio_path}'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) + # Ask to rank by relevance to databases/Flock - the real audio should rank higher query = ( """ SELECT llm_rerank( @@ -795,10 +814,11 @@ def test_llm_rerank_with_audio_transcription(integration_setup, transcription_mo + test_model_name + """'}, { - 'prompt': 'Rank these audio interviews from best to worst', + 'prompt': 'Rank these entries by relevance to database technology and Flock. Return results with the most relevant first.', 'context_columns': [ + {'data': topic, 'type': 'text'}, { - 'data': audio_url, + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -806,20 +826,29 @@ def test_llm_rerank_with_audio_transcription(integration_setup, transcription_mo } ] } - ) AS ranked_candidates - FROM audio_candidates; + ) AS ranked_topics + FROM audio_topics; """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "ranked_candidates" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + + # Parse the JSON output to verify the returned tuples + lines = result.stdout.strip().split("\n") + assert len(lines) >= 2, "Expected at least header and one result row" + + # Parse CSV output to get the JSON result + reader = csv.DictReader(StringIO(result.stdout)) + row = next(reader, None) + assert row is not None and "ranked_topics" in row + + # Parse the JSON result which contains the reranked tuples + result_json = json.loads(row["ranked_topics"]) + assert isinstance(result_json, list), ( + f"Expected list of tuples, got: {type(result_json)}" + ) + assert len(result_json) > 0, "Expected at least one tuple in result" def test_llm_rerank_audio_ollama_error(integration_setup): @@ -827,14 +856,16 @@ def test_llm_rerank_audio_ollama_error(integration_setup): duckdb_cli_path, db_path = integration_setup test_model_name = "test-ollama-rerank-audio" - create_model_query = "CREATE MODEL('test-ollama-rerank-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = ( + "CREATE MODEL('test-ollama-rerank-audio', 'gemma3:1b', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = "test-ollama-rerank-transcription" create_transcription_model_query = ( - "CREATE MODEL('test-ollama-rerank-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-rerank-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_audio ( From 508c1a3eb501b8fbbb11d58e248257f5fb3c5f21 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:33 -0500 Subject: [PATCH 44/59] Updated integration tests for scalar LLM functions --- .../functions/scalar/test_llm_complete.py | 241 +++++++++--------- .../functions/scalar/test_llm_embedding.py | 22 +- .../tests/functions/scalar/test_llm_filter.py | 121 +++++---- 3 files changed, 204 insertions(+), 180 deletions(-) diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py b/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py index 420fd0d3..c0107836 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_complete.py @@ -1,10 +1,24 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -16,7 +30,7 @@ def test_llm_complete_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -42,7 +56,7 @@ def test_llm_complete_with_input_columns(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE countries ( @@ -89,7 +103,7 @@ def test_llm_complete_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_reviews ( @@ -153,7 +167,7 @@ def test_llm_complete_error_handling_empty_prompt(integration_setup, model_confi create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -178,7 +192,7 @@ def test_llm_complete_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE special_text ( @@ -223,7 +237,7 @@ def test_llm_complete_with_model_params(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -251,7 +265,7 @@ def test_llm_complete_with_structured_output_without_table( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) response_format = "" if provider == "openai": @@ -323,7 +337,7 @@ def test_llm_complete_with_structured_output_with_table( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE countries ( @@ -411,7 +425,7 @@ def _llm_complete_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_dataset AS @@ -444,16 +458,16 @@ def _llm_complete_performance_large_dataset(integration_setup, model_config): ) -def test_llm_complete_with_image_integration(integration_setup, model_config): +def test_llm_complete_with_image_integration(integration_setup, model_config_image): """Test llm_complete with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE animal_images ( @@ -514,16 +528,16 @@ def test_llm_complete_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_complete_image_batch_processing(integration_setup, model_config): +def test_llm_complete_image_batch_processing(integration_setup, model_config_image): """Test llm_complete with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-batch-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_images ( @@ -589,16 +603,16 @@ def test_llm_complete_image_batch_processing(integration_setup, model_config): assert "product_analysis" in result.stdout.lower() -def test_llm_complete_image_with_text_context(integration_setup, model_config): +def test_llm_complete_image_with_text_context(integration_setup, model_config_image): """Test llm_complete with both image and text context.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-text-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE landscape_photos ( @@ -664,18 +678,10 @@ def test_llm_complete_image_with_text_context(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models.""" - return request.param - - -def test_llm_complete_with_audio_transcription( - integration_setup, transcription_model_config -): +def test_llm_complete_with_audio_transcription(integration_setup, model_config): """Test llm_complete with audio transcription using OpenAI.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config # Skip if not OpenAI (only OpenAI supports transcription currently) if provider != "openai": @@ -684,17 +690,17 @@ def test_llm_complete_with_audio_transcription( # Create main completion model test_model_name = f"test-audio-complete_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Create transcription model transcription_model_name = f"test-transcription-model_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) - # Use a publicly available test audio file URL - # Note: In real tests, you might want to use a mock server or local file + # Get audio file path + audio_path = get_audio_file_path() + + # Test with audio file path using VALUES query = ( """ SELECT llm_complete( @@ -702,10 +708,10 @@ def test_llm_complete_with_audio_transcription( + test_model_name + """'}, { - 'prompt': 'Summarize what you hear in this audio clip in one sentence.', + 'prompt': 'What product or technology is mentioned in this audio? Provide a brief answer.', 'context_columns': [ { - 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -713,45 +719,43 @@ def test_llm_complete_with_audio_transcription( } ] } - ) AS audio_summary; + ) AS audio_summary + FROM VALUES ('""" + + audio_path + + """') AS tbl(audio_path); """ ) result = run_cli(duckdb_cli_path, db_path, query) - # Note: This test may fail if the audio URL is not accessible - # In a real scenario, you'd use a mock server or local test file - if result.returncode != 0: - # If it fails due to network/audio issues, that's acceptable for integration tests - # We're mainly testing that the query structure is correct - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "audio_summary" in result.stdout.lower() - - -def test_llm_complete_with_audio_and_text( - integration_setup, transcription_model_config -): + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "audio_summary" in result.stdout.lower() + # Verify the response is based on the audio content + output_lower = result.stdout.lower() + assert any(keyword in output_lower for keyword in AUDIO_EXPECTED_KEYWORDS), ( + f"Expected response to contain at least one of {AUDIO_EXPECTED_KEYWORDS}, got: {result.stdout}" + ) + + +def test_llm_complete_with_audio_and_text(integration_setup, model_config): """Test llm_complete with both audio and text context columns.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-text_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() + # Test with audio file path using VALUES - combining text context with audio query = ( """ SELECT llm_complete( @@ -759,11 +763,11 @@ def test_llm_complete_with_audio_and_text( + test_model_name + """'}, { - 'prompt': 'Based on the product name {product} and the audio description, write a marketing description.', + 'prompt': 'Given the category {category}, describe how the technology mentioned in the audio fits into this category.', 'context_columns': [ - {'data': 'Wireless Headphones', 'name': 'product'}, + {'data': category_name, 'name': 'category'}, { - 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -771,20 +775,21 @@ def test_llm_complete_with_audio_and_text( } ] } - ) AS marketing_copy; + ) AS tech_description + FROM VALUES ('Database Technology', '""" + + audio_path + + """') AS tbl(category_name, audio_path); """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - # Acceptable if network/audio issues occur - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "marketing_copy" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "tech_description" in result.stdout.lower() + # Verify the response mentions something from the audio content + output_lower = result.stdout.lower() + assert any(keyword in output_lower for keyword in AUDIO_EXPECTED_KEYWORDS), ( + f"Expected response to contain at least one of {AUDIO_EXPECTED_KEYWORDS}, got: {result.stdout}" + ) def test_llm_complete_audio_missing_transcription_model(integration_setup): @@ -793,7 +798,10 @@ def test_llm_complete_audio_missing_transcription_model(integration_setup): test_model_name = "test-audio-error" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() query = ( """ @@ -805,12 +813,15 @@ def test_llm_complete_audio_missing_transcription_model(integration_setup): 'prompt': 'Summarize this audio', 'context_columns': [ { - 'data': 'https://example.com/audio.mp3', + 'data': audio_path, 'type': 'audio' } ] } - ) AS result; + ) AS result + FROM VALUES ('""" + + audio_path + + """') AS tbl(audio_path); """ ) result = run_cli(duckdb_cli_path, db_path, query) @@ -827,28 +838,32 @@ def test_llm_complete_audio_ollama_error(integration_setup): """Test that Ollama provider throws error for audio transcription.""" duckdb_cli_path, db_path = integration_setup - create_model_query = "CREATE MODEL('test-ollama-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = "CREATE MODEL('test-ollama-audio', 'gemma3:1b', 'ollama');" + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_transcription_model_query = ( - "CREATE MODEL('test-ollama-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) - query = """ + # Get audio file path + audio_path = get_audio_file_path() + + query = f""" SELECT llm_complete( - {'model_name': 'test-ollama-audio'}, - { + {{"model_name": 'test-ollama-audio'}}, + {{ 'prompt': 'Summarize this audio', 'context_columns': [ - { - 'data': 'https://example.com/audio.mp3', + {{ + 'data': audio_path, 'type': 'audio', 'transcription_model': 'test-ollama-transcription' - } + }} ] - } - ) AS result; + }} + ) AS result + FROM VALUES ('{audio_path}') AS tbl(audio_path); """ result = run_cli(duckdb_cli_path, db_path, query) @@ -861,41 +876,40 @@ def test_llm_complete_audio_ollama_error(integration_setup): ) -def test_llm_complete_audio_batch_processing( - integration_setup, transcription_model_config -): +def test_llm_complete_audio_batch_processing(integration_setup, model_config): """Test batch processing with multiple audio files.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-batch_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-batch_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() create_table_query = """ CREATE OR REPLACE TABLE audio_clips ( id INTEGER, - audio_url VARCHAR, + audio_path VARCHAR, product_name VARCHAR ); """ run_cli(duckdb_cli_path, db_path, create_table_query) - insert_data_query = """ + insert_data_query = f""" INSERT INTO audio_clips VALUES - (1, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Headphones'), - (2, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Speaker'), - (3, 'https://download.samplelib.com/mp3/sample-9s.mp3', 'Microphone'); + (1, '{audio_path}', 'Headphones'), + (2, '{audio_path}', 'Speaker'), + (3, '{audio_path}', 'Microphone'); """ run_cli(duckdb_cli_path, db_path, insert_data_query) @@ -911,7 +925,7 @@ def test_llm_complete_audio_batch_processing( 'context_columns': [ {'data': product_name, 'name': 'product'}, { - 'data': audio_url, + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -926,13 +940,6 @@ def test_llm_complete_audio_batch_processing( ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - # Acceptable if network/audio issues occur - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - lines = result.stdout.strip().split("\n") - assert len(lines) >= 3 # Header + at least 2 data rows + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + lines = result.stdout.strip().split("\n") + assert len(lines) >= 3 # Header + at least 2 data rows diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_embedding.py b/test/integration/src/integration/tests/functions/scalar/test_llm_embedding.py index 5169d2d0..dc8d7e3e 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_embedding.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_embedding.py @@ -18,7 +18,7 @@ def test_llm_embedding_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -47,7 +47,7 @@ def test_llm_embedding_with_multiple_text_fields(integration_setup, model_config create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -75,7 +75,7 @@ def test_llm_embedding_with_input_columns(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE documents ( @@ -130,7 +130,7 @@ def test_llm_embedding_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE product_descriptions ( @@ -200,7 +200,7 @@ def test_llm_embedding_error_handling_empty_text(integration_setup, model_config create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -227,7 +227,7 @@ def test_llm_embedding_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE special_text ( @@ -272,7 +272,7 @@ def test_llm_embedding_with_model_params(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -299,7 +299,7 @@ def test_llm_embedding_document_similarity_use_case(integration_setup, model_con create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE knowledge_base ( @@ -356,7 +356,7 @@ def test_llm_embedding_concatenated_fields(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE products ( @@ -410,7 +410,7 @@ def _llm_embedding_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_text_dataset AS @@ -456,7 +456,7 @@ def test_llm_embedding_error_handling_malformed_input(integration_setup, model_c create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with missing required arguments query = """ diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py index 42f30353..b3c229e1 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py @@ -1,10 +1,24 @@ import pytest -from integration.conftest import run_cli, get_image_data_for_provider +from integration.conftest import ( + run_cli, + get_image_data_for_provider, + get_audio_file_path, +) +# Expected keywords that should appear when audio is transcribed +# Audio content: "Flock transforms DuckDB into a hybrid database and a semantic AI engine" +AUDIO_EXPECTED_KEYWORDS = ["flock", "duckdb", "database", "semantic", "ai", "hybrid"] -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): - """Fixture to test with different models.""" + """Fixture to test with different models for text-only tests.""" + return request.param + + +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:4b", "ollama")]) +def model_config_image(request): + """Fixture to test with different models for image tests.""" return request.param @@ -16,7 +30,7 @@ def test_llm_filter_basic_functionality(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -62,7 +76,7 @@ def test_llm_filter_batch_processing(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_items ( @@ -102,7 +116,7 @@ def test_llm_filter_batch_processing(integration_setup, model_config): assert result.returncode == 0, f"Query failed with error: {result.stderr}" lines = result.stdout.strip().split("\n") assert len(lines) >= 6, f"Expected at least 6 lines, got {len(lines)}" - assert "true" in result.stdout.lower() and "false" in result.stdout.lower() + assert "true" in result.stdout.lower() or "false" in result.stdout.lower() def test_llm_filter_error_handling_invalid_model(integration_setup): @@ -146,7 +160,7 @@ def test_llm_filter_error_handling_empty_prompt(integration_setup, model_config) create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -186,7 +200,7 @@ def test_llm_filter_with_special_characters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE special_text ( @@ -232,7 +246,7 @@ def test_llm_filter_with_model_params(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE test_data ( @@ -275,7 +289,7 @@ def test_llm_filter_with_structured_output(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE items ( @@ -340,7 +354,7 @@ def test_llm_filter_error_handling_missing_arguments(integration_setup, model_co create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Test with only 1 argument (should fail since llm_filter requires 2) query = ( @@ -366,7 +380,7 @@ def _test_llm_filter_performance_large_dataset(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE large_content AS @@ -401,16 +415,16 @@ def _test_llm_filter_performance_large_dataset(integration_setup, model_config): assert "true" in result.stdout.lower() or "false" in result.stdout.lower() -def test_llm_filter_with_image_integration(integration_setup, model_config): +def test_llm_filter_with_image_integration(integration_setup, model_config_image): """Test llm_filter with image data integration.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-filter-model_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE vehicle_images ( @@ -471,16 +485,16 @@ def test_llm_filter_with_image_integration(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -def test_llm_filter_image_batch_processing(integration_setup, model_config): +def test_llm_filter_image_batch_processing(integration_setup, model_config_image): """Test llm_filter with multiple images in batch processing.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-batch-filter_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE food_images ( @@ -545,16 +559,16 @@ def test_llm_filter_image_batch_processing(integration_setup, model_config): assert "is_appetizing" in result.stdout.lower() -def test_llm_filter_image_with_text_context(integration_setup, model_config): +def test_llm_filter_image_with_text_context(integration_setup, model_config_image): """Test llm_filter with both image and text context.""" duckdb_cli_path, db_path = integration_setup - model_name, provider = model_config + model_name, provider = model_config_image test_model_name = f"test-image-text-filter_{model_name}" create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) create_table_query = """ CREATE OR REPLACE TABLE clothing_images ( @@ -620,30 +634,30 @@ def test_llm_filter_image_with_text_context(integration_setup, model_config): assert len(result.stdout.strip().split("\n")) >= 2 -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gpt-4o-transcribe", "openai")]) -def transcription_model_config(request): - """Fixture to test with transcription-capable models (OpenAI/Azure only).""" - return request.param - +def test_llm_filter_with_audio_transcription(integration_setup, model_config): + """Test llm_filter with audio transcription using OpenAI. -def test_llm_filter_with_audio_transcription(integration_setup, transcription_model_config): - """Test llm_filter with audio transcription using OpenAI.""" + The audio content says: 'Flock transforms DuckDB into a hybrid database and a semantic AI engine' + This test verifies that the audio is correctly transcribed and filtered. + """ duckdb_cli_path, db_path = integration_setup - model_name, provider = transcription_model_config + model_name, provider = model_config if provider != "openai": pytest.skip("Audio transcription is only supported for OpenAI provider") test_model_name = f"test-audio-filter_{model_name}" create_model_query = f"CREATE MODEL('{test_model_name}', 'gpt-4o-mini', 'openai');" - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = f"test-transcription-filter_{model_name}" - create_transcription_model_query = ( - f"CREATE MODEL('{transcription_model_name}', '{model_name}', 'openai');" - ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + + # Get audio file path + audio_path = get_audio_file_path() + # Test with audio file path - the audio actually mentions DuckDB/Flock query = ( """ SELECT llm_filter( @@ -651,10 +665,10 @@ def test_llm_filter_with_audio_transcription(integration_setup, transcription_mo + test_model_name + """'}, { - 'prompt': 'Does this audio contain positive sentiment? Answer true or false.', + 'prompt': 'Does this audio mention DuckDB or databases? Answer true or false.', 'context_columns': [ { - 'data': 'https://download.samplelib.com/mp3/sample-9s.mp3', + 'data': audio_path, 'type': 'audio', 'transcription_model': '""" + transcription_model_name @@ -662,20 +676,20 @@ def test_llm_filter_with_audio_transcription(integration_setup, transcription_mo } ] } - ) AS is_positive; + ) AS mentions_database + FROM VALUES ('""" + + audio_path + + """') AS tbl(audio_path); """ ) result = run_cli(duckdb_cli_path, db_path, query) - if result.returncode != 0: - assert ( - "transcription" in result.stderr.lower() - or "audio" in result.stderr.lower() - or "error" in result.stderr.lower() - ) - else: - assert "is_positive" in result.stdout.lower() - assert "true" in result.stdout.lower() or "false" in result.stdout.lower() + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + # The audio mentions DuckDB, so the filter should return true + result_lower = result.stdout.lower() + assert "true" in result_lower, ( + f"Expected 'true' since audio mentions DuckDB. Got: {result.stdout}" + ) def test_llm_filter_audio_ollama_error(integration_setup): @@ -683,14 +697,16 @@ def test_llm_filter_audio_ollama_error(integration_setup): duckdb_cli_path, db_path = integration_setup test_model_name = "test-ollama-filter-audio" - create_model_query = "CREATE MODEL('test-ollama-filter-audio', 'llama3.2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_model_query) + create_model_query = ( + "CREATE MODEL('test-ollama-filter-audio', 'gemma3:1b', 'ollama');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) transcription_model_name = "test-ollama-filter-transcription" create_transcription_model_query = ( - "CREATE MODEL('test-ollama-filter-transcription', 'llama3.2', 'ollama');" + "CREATE MODEL('test-ollama-filter-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query) + run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) query = """ SELECT llm_filter( @@ -699,13 +715,14 @@ def test_llm_filter_audio_ollama_error(integration_setup): 'prompt': 'Is the sentiment positive?', 'context_columns': [ { - 'data': 'https://example.com/audio.mp3', + 'data': audio_url, 'type': 'audio', 'transcription_model': 'test-ollama-filter-transcription' } ] } - ) AS result; + ) AS result + FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url); """ result = run_cli(duckdb_cli_path, db_path, query) From 4a7a5fe330bdcd9cc7713ac17d61e62c50a22964 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:37 -0500 Subject: [PATCH 45/59] Updated integration tests for metrics, parsers, and secret manager --- .../integration/tests/metrics/test_metrics.py | 28 +-- .../tests/model_parser/test_model_parser.py | 124 +++++------ .../tests/prompt_parser/test_prompt_parser.py | 90 ++++---- .../secret_manager/test_secret_manager.py | 46 ++-- test/unit/functions/aggregate/llm_first.cpp | 181 ++++++++-------- test/unit/functions/aggregate/llm_last.cpp | 193 ++++++++--------- test/unit/functions/aggregate/llm_reduce.cpp | 165 +++++++------- test/unit/functions/aggregate/llm_rerank.cpp | 204 ++++++++---------- 8 files changed, 487 insertions(+), 544 deletions(-) diff --git a/test/integration/src/integration/tests/metrics/test_metrics.py b/test/integration/src/integration/tests/metrics/test_metrics.py index 8513a225..9535a5cf 100644 --- a/test/integration/src/integration/tests/metrics/test_metrics.py +++ b/test/integration/src/integration/tests/metrics/test_metrics.py @@ -14,7 +14,7 @@ def get_json_from_csv_output(stdout, column_name="metrics"): return None -@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("llama3.2", "ollama")]) +@pytest.fixture(params=[("gpt-4o-mini", "openai"), ("gemma3:1b", "ollama")]) def model_config(request): return request.param @@ -63,7 +63,7 @@ def test_metrics_after_llm_complete(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_complete and get_metrics in the same query query = ( @@ -121,7 +121,7 @@ def test_metrics_reset_clears_counters(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # First query: execute llm_complete and get metrics in the same query query1 = ( @@ -176,7 +176,7 @@ def test_sequential_numbering_multiple_calls(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Make three calls to llm_complete in the same query query = ( @@ -253,7 +253,7 @@ def test_flock_get_debug_metrics_returns_nested_structure( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_complete and get debug metrics query = ( @@ -315,7 +315,7 @@ def test_debug_metrics_registration_order(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Make multiple calls query = ( @@ -378,7 +378,7 @@ def test_aggregate_function_metrics_tracking(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_reduce and get metrics query = ( @@ -449,7 +449,7 @@ def test_aggregate_function_metrics_merging_with_group_by( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_reduce with GROUP BY that will process multiple states # This should result in multiple states being processed, but only ONE merged metrics entry @@ -521,7 +521,7 @@ def test_aggregate_function_metrics_merging_multiple_groups( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_reduce with multiple GROUP BY groups # Each group should produce ONE merged metrics entry @@ -579,7 +579,7 @@ def test_multiple_aggregate_functions_sequential_numbering( create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) # Call llm_reduce twice in the same query query = ( @@ -644,7 +644,7 @@ def test_aggregate_function_debug_metrics(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -704,7 +704,7 @@ def test_llm_rerank_metrics(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -758,7 +758,7 @@ def test_llm_first_metrics(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ @@ -816,7 +816,7 @@ def test_mixed_scalar_and_aggregate_metrics(integration_setup, model_config): create_model_query = ( f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" ) - run_cli(duckdb_cli_path, db_path, create_model_query) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) query = ( """ diff --git a/test/integration/src/integration/tests/model_parser/test_model_parser.py b/test/integration/src/integration/tests/model_parser/test_model_parser.py index c6b47374..3bf06de9 100644 --- a/test/integration/src/integration/tests/model_parser/test_model_parser.py +++ b/test/integration/src/integration/tests/model_parser/test_model_parser.py @@ -4,9 +4,9 @@ def test_create_and_get_model(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('test-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "test-model" in result.stdout assert "gpt-4o" in result.stdout assert "openai" in result.stdout @@ -16,23 +16,23 @@ def test_create_and_get_model(integration_setup): def test_create_get_delete_global_model(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE GLOBAL MODEL('global-test-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'global-test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "global-test-model" in result.stdout assert "gpt-4" in result.stdout assert "openai" in result.stdout assert "global" in result.stdout delete_query = "DELETE MODEL 'global-test-model';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_create_local_model_explicit(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE LOCAL MODEL('local-test-model', 'llama2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'local-test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "local-test-model" in result.stdout assert "llama2" in result.stdout assert "ollama" in result.stdout @@ -42,9 +42,9 @@ def test_create_local_model_explicit(integration_setup): def test_create_model_with_args(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('model-with-args', 'gpt-4o', 'openai', '{\"batch_size\": 10, \"tuple_format\": \"csv\"}');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'model-with-args';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "model-with-args" in result.stdout assert "gpt-4o" in result.stdout assert "openai" in result.stdout @@ -53,22 +53,22 @@ def test_create_model_with_args(integration_setup): def test_delete_model(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('delete-test-model', 'gpt-4o', 'azure');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) delete_query = "DELETE MODEL 'delete-test-model';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) get_query = "GET MODEL 'delete-test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "delete-test-model" not in result.stdout or result.stdout.strip() == "" def test_update_model_content(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('update-test-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE MODEL('update-test-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET MODEL 'update-test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "update-test-model" in result.stdout assert "gpt-4" in result.stdout assert "openai" in result.stdout @@ -77,11 +77,11 @@ def test_update_model_content(integration_setup): def test_update_model_with_args(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('update-args-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE MODEL('update-args-model', 'gpt-4o', 'openai', '{\"batch_size\": 5, \"model_parameters\": {\"temperature\": 0.7}}');" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET MODEL 'update-args-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "update-args-model" in result.stdout assert "gpt-4" in result.stdout @@ -89,25 +89,25 @@ def test_update_model_with_args(integration_setup): def test_update_model_scope_to_global(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE LOCAL MODEL('scope-test-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE MODEL 'scope-test-model' TO GLOBAL;" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET MODEL 'scope-test-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "scope-test-model" in result.stdout assert "global" in result.stdout delete_query = "DELETE MODEL 'scope-test-model';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_update_model_scope_to_local(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE GLOBAL MODEL('scope-test-model-2', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE MODEL 'scope-test-model-2' TO LOCAL;" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET MODEL 'scope-test-model-2';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "scope-test-model-2" in result.stdout assert "local" in result.stdout @@ -116,24 +116,24 @@ def test_get_all_models(integration_setup): duckdb_cli_path, db_path = integration_setup create_query1 = "CREATE MODEL('model1', 'gpt-4o', 'openai');" create_query2 = "CREATE GLOBAL MODEL('model2', 'llama2', 'ollama');" - run_cli(duckdb_cli_path, db_path, create_query1) - run_cli(duckdb_cli_path, db_path, create_query2) + run_cli(duckdb_cli_path, db_path, create_query1, with_secrets=False) + run_cli(duckdb_cli_path, db_path, create_query2, with_secrets=False) get_all_query = "GET MODELS;" - result = run_cli(duckdb_cli_path, db_path, get_all_query) + result = run_cli(duckdb_cli_path, db_path, get_all_query, with_secrets=False) assert "model1" in result.stdout assert "model2" in result.stdout assert "gpt-4o" in result.stdout assert "llama2" in result.stdout delete_query = "DELETE MODEL 'model2';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_create_model_duplicate_error(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('duplicate-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) duplicate_query = "CREATE MODEL('duplicate-model', 'gpt-4o', 'openai');" - result = run_cli(duckdb_cli_path, db_path, duplicate_query) + result = run_cli(duckdb_cli_path, db_path, duplicate_query, with_secrets=False) assert result.returncode != 0 or "already exist" in result.stderr @@ -141,17 +141,17 @@ def test_create_model_invalid_syntax(integration_setup): duckdb_cli_path, db_path = integration_setup # Missing opening parenthesis invalid_query1 = "CREATE MODEL 'test', 'gpt-4o', 'openai');" - result1 = run_cli(duckdb_cli_path, db_path, invalid_query1) + result1 = run_cli(duckdb_cli_path, db_path, invalid_query1, with_secrets=False) assert result1.returncode != 0 # Missing comma between parameters invalid_query2 = "CREATE MODEL('test' 'gpt-4o' 'openai');" - result2 = run_cli(duckdb_cli_path, db_path, invalid_query2) + result2 = run_cli(duckdb_cli_path, db_path, invalid_query2, with_secrets=False) assert result2.returncode != 0 # Missing closing parenthesis invalid_query3 = "CREATE MODEL('test', 'gpt-4o', 'openai';" - result3 = run_cli(duckdb_cli_path, db_path, invalid_query3) + result3 = run_cli(duckdb_cli_path, db_path, invalid_query3, with_secrets=False) assert result3.returncode != 0 @@ -159,33 +159,33 @@ def test_create_model_invalid_json_args(integration_setup): duckdb_cli_path, db_path = integration_setup # Invalid JSON format invalid_query1 = "CREATE MODEL('test-model', 'gpt-4o', 'openai', '{invalid json}');" - result1 = run_cli(duckdb_cli_path, db_path, invalid_query1) + result1 = run_cli(duckdb_cli_path, db_path, invalid_query1, with_secrets=False) assert result1.returncode != 0 # Invalid parameter in JSON invalid_query2 = "CREATE MODEL('test-model', 'gpt-4o', 'openai', '{\"invalid_param\": \"value\"}');" - result2 = run_cli(duckdb_cli_path, db_path, invalid_query2) + result2 = run_cli(duckdb_cli_path, db_path, invalid_query2, with_secrets=False) assert result2.returncode != 0 def test_delete_nonexistent_model(integration_setup): duckdb_cli_path, db_path = integration_setup delete_query = "DELETE MODEL 'nonexistent-model';" - result = run_cli(duckdb_cli_path, db_path, delete_query) + result = run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) assert result.returncode == 0 def test_update_nonexistent_model_error(integration_setup): duckdb_cli_path, db_path = integration_setup update_query = "UPDATE MODEL('nonexistent-model', 'gpt-4o', 'openai');" - result = run_cli(duckdb_cli_path, db_path, update_query) + result = run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) assert result.returncode != 0 or "doesn't exist" in result.stderr def test_get_nonexistent_model(integration_setup): duckdb_cli_path, db_path = integration_setup get_query = "GET MODEL 'nonexistent-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert result.returncode == 0 assert "nonexistent-model" not in result.stdout or result.stdout.strip() == "" @@ -193,37 +193,37 @@ def test_get_nonexistent_model(integration_setup): def test_empty_model_name_error(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query = "CREATE MODEL('', 'gpt-4o', 'openai');" - result = run_cli(duckdb_cli_path, db_path, invalid_query) + result = run_cli(duckdb_cli_path, db_path, invalid_query, with_secrets=False) assert result.returncode != 0 def test_empty_model_value_error(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query = "CREATE MODEL('test-model', '', 'openai');" - result = run_cli(duckdb_cli_path, db_path, invalid_query) + result = run_cli(duckdb_cli_path, db_path, invalid_query, with_secrets=False) assert result.returncode != 0 def test_empty_provider_name_error(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query = "CREATE MODEL('test-model', 'gpt-4o', '');" - result = run_cli(duckdb_cli_path, db_path, invalid_query) + result = run_cli(duckdb_cli_path, db_path, invalid_query, with_secrets=False) assert result.returncode != 0 def test_get_model_vs_get_models(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('test-get-model', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) # Test GET MODEL (singular) get_single_query = "GET MODEL 'test-get-model';" - result_single = run_cli(duckdb_cli_path, db_path, get_single_query) + result_single = run_cli(duckdb_cli_path, db_path, get_single_query, with_secrets=False) assert "test-get-model" in result_single.stdout # Test GET MODELS (plural) - should get all models get_all_query = "GET MODELS;" - result_all = run_cli(duckdb_cli_path, db_path, get_all_query) + result_all = run_cli(duckdb_cli_path, db_path, get_all_query, with_secrets=False) assert "test-get-model" in result_all.stdout @@ -232,11 +232,11 @@ def test_model_args_allowed_parameters(integration_setup): # Test valid parameters: tuple_format, batch_size, model_parameters valid_query = 'CREATE MODEL(\'valid-args-model\', \'gpt-4o\', \'openai\', \'{"tuple_format": "json", "batch_size": 5, "model_parameters": {"temperature": 0.8}}\');' - result = run_cli(duckdb_cli_path, db_path, valid_query) + result = run_cli(duckdb_cli_path, db_path, valid_query, with_secrets=False) assert result.returncode == 0 get_query = "GET MODEL 'valid-args-model';" - get_result = run_cli(duckdb_cli_path, db_path, get_query) + get_result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "valid-args-model" in get_result.stdout @@ -248,12 +248,12 @@ def test_multiple_providers(integration_setup): azure_query = "CREATE MODEL('azure-model', 'gpt-4o', 'azure');" ollama_query = "CREATE MODEL('ollama-model', 'llama2', 'ollama');" - run_cli(duckdb_cli_path, db_path, openai_query) - run_cli(duckdb_cli_path, db_path, azure_query) - run_cli(duckdb_cli_path, db_path, ollama_query) + run_cli(duckdb_cli_path, db_path, openai_query, with_secrets=False) + run_cli(duckdb_cli_path, db_path, azure_query, with_secrets=False) + run_cli(duckdb_cli_path, db_path, ollama_query, with_secrets=False) get_all_query = "GET MODELS;" - result = run_cli(duckdb_cli_path, db_path, get_all_query) + result = run_cli(duckdb_cli_path, db_path, get_all_query, with_secrets=False) assert "openai-model" in result.stdout assert "azure-model" in result.stdout assert "ollama-model" in result.stdout @@ -266,9 +266,9 @@ def test_multiple_providers(integration_setup): def test_create_model_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('no-semicolon-model', 'gpt-4o', 'openai')" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'no-semicolon-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "no-semicolon-model" in result.stdout @@ -277,9 +277,9 @@ def test_create_model_with_comment(integration_setup): create_query = ( "CREATE MODEL('comment-model', 'gpt-4o', 'openai'); -- This is a comment" ) - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'comment-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "comment-model" in result.stdout @@ -287,27 +287,27 @@ def test_create_model_with_comment_before(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = """-- Create a test model CREATE MODEL('comment-before-model', 'gpt-4o', 'openai');""" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODEL 'comment-before-model';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "comment-before-model" in result.stdout def test_delete_model_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('delete-no-semi', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) delete_query = "DELETE MODEL 'delete-no-semi'" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) get_query = "GET MODEL 'delete-no-semi';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "delete-no-semi" not in result.stdout or result.stdout.strip() == "" def test_get_models_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE MODEL('get-no-semi', 'gpt-4o', 'openai');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET MODELS" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "get-no-semi" in result.stdout diff --git a/test/integration/src/integration/tests/prompt_parser/test_prompt_parser.py b/test/integration/src/integration/tests/prompt_parser/test_prompt_parser.py index b41d9783..5926a76e 100644 --- a/test/integration/src/integration/tests/prompt_parser/test_prompt_parser.py +++ b/test/integration/src/integration/tests/prompt_parser/test_prompt_parser.py @@ -4,9 +4,9 @@ def test_create_and_get_prompt(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('test-prompt', 'Test prompt content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "test-prompt" in result.stdout assert "Test prompt content" in result.stdout assert "local" in result.stdout @@ -15,22 +15,22 @@ def test_create_and_get_prompt(integration_setup): def test_create_get_delete_global_prompt(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE GLOBAL PROMPT('global-test-prompt', 'Global test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'global-test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "global-test-prompt" in result.stdout assert "Global test content" in result.stdout assert "global" in result.stdout delete_query = "DELETE PROMPT 'global-test-prompt';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_create_local_prompt_explicit(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE LOCAL PROMPT('local-test-prompt', 'Local test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'local-test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "local-test-prompt" in result.stdout assert "Local test content" in result.stdout assert "local" in result.stdout @@ -39,22 +39,22 @@ def test_create_local_prompt_explicit(integration_setup): def test_delete_prompt(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('delete-test-prompt', 'To be deleted');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) delete_query = "DELETE PROMPT 'delete-test-prompt';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) get_query = "GET PROMPT 'delete-test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "delete-test-prompt" not in result.stdout or result.stdout.strip() == "" def test_update_prompt_content(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('update-test-prompt', 'Original content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE PROMPT('update-test-prompt', 'Updated content');" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET PROMPT 'update-test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "update-test-prompt" in result.stdout assert "Updated content" in result.stdout @@ -62,25 +62,25 @@ def test_update_prompt_content(integration_setup): def test_update_prompt_scope_to_global(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE LOCAL PROMPT('scope-test-prompt', 'Test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE PROMPT 'scope-test-prompt' TO GLOBAL;" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET PROMPT 'scope-test-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "scope-test-prompt" in result.stdout assert "global" in result.stdout delete_query = "DELETE PROMPT 'scope-test-prompt';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_update_prompt_scope_to_local(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE GLOBAL PROMPT('scope-test-prompt-2', 'Test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) update_query = "UPDATE PROMPT 'scope-test-prompt-2' TO LOCAL;" - run_cli(duckdb_cli_path, db_path, update_query) + run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) get_query = "GET PROMPT 'scope-test-prompt-2';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "scope-test-prompt-2" in result.stdout assert "local" in result.stdout @@ -89,58 +89,58 @@ def test_get_all_prompts(integration_setup): duckdb_cli_path, db_path = integration_setup create_query1 = "CREATE PROMPT('prompt1', 'Content 1');" create_query2 = "CREATE GLOBAL PROMPT('prompt2', 'Content 2');" - run_cli(duckdb_cli_path, db_path, create_query1) - run_cli(duckdb_cli_path, db_path, create_query2) + run_cli(duckdb_cli_path, db_path, create_query1, with_secrets=False) + run_cli(duckdb_cli_path, db_path, create_query2, with_secrets=False) get_all_query = "GET PROMPTS;" - result = run_cli(duckdb_cli_path, db_path, get_all_query) + result = run_cli(duckdb_cli_path, db_path, get_all_query, with_secrets=False) assert "prompt1" in result.stdout assert "prompt2" in result.stdout assert "Content 1" in result.stdout assert "Content 2" in result.stdout delete_query = "DELETE PROMPT 'prompt2';" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) def test_create_prompt_duplicate_error(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('duplicate-prompt', 'Original');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) duplicate_query = "CREATE PROMPT('duplicate-prompt', 'Duplicate');" - result = run_cli(duckdb_cli_path, db_path, duplicate_query) + result = run_cli(duckdb_cli_path, db_path, duplicate_query, with_secrets=False) assert result.returncode != 0 or "already exist" in result.stderr def test_create_prompt_invalid_syntax(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query1 = "CREATE PROMPT 'test', 'content');" - result1 = run_cli(duckdb_cli_path, db_path, invalid_query1) + result1 = run_cli(duckdb_cli_path, db_path, invalid_query1, with_secrets=False) assert result1.returncode != 0 invalid_query2 = "CREATE PROMPT('test' 'content');" - result2 = run_cli(duckdb_cli_path, db_path, invalid_query2) + result2 = run_cli(duckdb_cli_path, db_path, invalid_query2, with_secrets=False) assert result2.returncode != 0 invalid_query3 = "CREATE PROMPT('test', 'content';" - result3 = run_cli(duckdb_cli_path, db_path, invalid_query3) + result3 = run_cli(duckdb_cli_path, db_path, invalid_query3, with_secrets=False) assert result3.returncode != 0 def test_delete_nonexistent_prompt(integration_setup): duckdb_cli_path, db_path = integration_setup delete_query = "DELETE PROMPT 'nonexistent-prompt';" - result = run_cli(duckdb_cli_path, db_path, delete_query) + result = run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) assert result.returncode == 0 def test_update_nonexistent_prompt_error(integration_setup): duckdb_cli_path, db_path = integration_setup update_query = "UPDATE PROMPT('nonexistent-prompt', 'New content');" - result = run_cli(duckdb_cli_path, db_path, update_query) + result = run_cli(duckdb_cli_path, db_path, update_query, with_secrets=False) assert result.returncode != 0 or "doesn't exist" in result.stderr def test_get_nonexistent_prompt(integration_setup): duckdb_cli_path, db_path = integration_setup get_query = "GET PROMPT 'nonexistent-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert result.returncode == 0 assert "nonexistent-prompt" not in result.stdout or result.stdout.strip() == "" @@ -148,14 +148,14 @@ def test_get_nonexistent_prompt(integration_setup): def test_empty_prompt_name_error(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query = "CREATE PROMPT('', 'content');" - result = run_cli(duckdb_cli_path, db_path, invalid_query) + result = run_cli(duckdb_cli_path, db_path, invalid_query, with_secrets=False) assert result.returncode != 0 def test_empty_prompt_content_error(integration_setup): duckdb_cli_path, db_path = integration_setup invalid_query = "CREATE PROMPT('test', '');" - result = run_cli(duckdb_cli_path, db_path, invalid_query) + result = run_cli(duckdb_cli_path, db_path, invalid_query, with_secrets=False) assert result.returncode != 0 @@ -163,9 +163,9 @@ def test_empty_prompt_content_error(integration_setup): def test_create_prompt_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('no-semi-prompt', 'Test content')" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'no-semi-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "no-semi-prompt" in result.stdout @@ -174,9 +174,9 @@ def test_create_prompt_with_comment(integration_setup): create_query = ( "CREATE PROMPT('comment-prompt', 'Test content'); -- This is a comment" ) - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'comment-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "comment-prompt" in result.stdout @@ -184,27 +184,27 @@ def test_create_prompt_with_comment_before(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = """-- Create a test prompt CREATE PROMPT('comment-before-prompt', 'Test content');""" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPT 'comment-before-prompt';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "comment-before-prompt" in result.stdout def test_delete_prompt_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('delete-no-semi', 'Test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) delete_query = "DELETE PROMPT 'delete-no-semi'" - run_cli(duckdb_cli_path, db_path, delete_query) + run_cli(duckdb_cli_path, db_path, delete_query, with_secrets=False) get_query = "GET PROMPT 'delete-no-semi';" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "delete-no-semi" not in result.stdout or result.stdout.strip() == "" def test_get_prompts_without_semicolon(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE PROMPT('get-no-semi', 'Test content');" - run_cli(duckdb_cli_path, db_path, create_query) + run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) get_query = "GET PROMPTS" - result = run_cli(duckdb_cli_path, db_path, get_query) + result = run_cli(duckdb_cli_path, db_path, get_query, with_secrets=False) assert "get-no-semi" in result.stdout diff --git a/test/integration/src/integration/tests/secret_manager/test_secret_manager.py b/test/integration/src/integration/tests/secret_manager/test_secret_manager.py index 4cb1f97c..73c3e06d 100644 --- a/test/integration/src/integration/tests/secret_manager/test_secret_manager.py +++ b/test/integration/src/integration/tests/secret_manager/test_secret_manager.py @@ -8,7 +8,7 @@ def test_create_openai_secret(integration_setup): create_query = ( f"CREATE SECRET {secret_name} (TYPE OPENAI, API_KEY 'test-api-key-123');" ) - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 @@ -16,7 +16,7 @@ def test_create_openai_secret_with_base_url(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_openai_secret_with_url" create_query = f"CREATE SECRET {secret_name} (TYPE OPENAI, API_KEY 'test-api-key-123', BASE_URL 'https://api.custom.com');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 @@ -24,7 +24,7 @@ def test_create_azure_secret(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_azure_secret" create_query = f"CREATE SECRET {secret_name} (TYPE AZURE_LLM, API_KEY 'test-azure-key', RESOURCE_NAME 'test-resource', API_VERSION '2023-05-15');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 @@ -34,7 +34,7 @@ def test_create_ollama_secret(integration_setup): create_query = ( f"CREATE SECRET {secret_name} (TYPE OLLAMA, API_URL 'http://localhost:11434');" ) - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 @@ -44,7 +44,7 @@ def test_create_openai_secret_missing_required_field(integration_setup): create_query = ( f"CREATE SECRET {secret_name} (TYPE OPENAI, BASE_URL 'https://api.openai.com');" ) - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 @@ -52,13 +52,13 @@ def test_create_azure_secret_missing_required_fields(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_azure_invalid" create_query = f"CREATE SECRET {secret_name} (TYPE AZURE_LLM, RESOURCE_NAME 'test-resource', API_VERSION '2023-05-15');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 create_query = f"CREATE SECRET {secret_name} (TYPE AZURE_LLM, API_KEY 'test-key', API_VERSION '2023-05-15');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 create_query = f"CREATE SECRET {secret_name} (TYPE AZURE_LLM, API_KEY 'test-key', RESOURCE_NAME 'test-resource');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 @@ -66,7 +66,7 @@ def test_create_ollama_secret_missing_required_field(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_ollama_invalid" create_query = f"CREATE SECRET {secret_name} (TYPE OLLAMA);" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 @@ -76,14 +76,14 @@ def test_create_secret_with_unsupported_type(integration_setup): create_query = ( f"CREATE SECRET {secret_name} (TYPE UNSUPPORTED_TYPE, API_KEY 'test-key');" ) - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 def test_create_secret_empty_name(integration_setup): duckdb_cli_path, db_path = integration_setup create_query = "CREATE SECRET '' (TYPE OPENAI, API_KEY 'test-key');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 @@ -91,7 +91,7 @@ def test_create_secret_empty_api_key(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_empty_key_secret" create_query = f"CREATE SECRET {secret_name} (TYPE OPENAI, API_KEY '');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode != 0 @@ -102,17 +102,17 @@ def test_multiple_secrets_different_types(integration_setup): create_openai = ( f"CREATE SECRET {openai_secret} (TYPE OPENAI, API_KEY 'openai-key');" ) - result = run_cli(duckdb_cli_path, db_path, create_openai) + result = run_cli(duckdb_cli_path, db_path, create_openai, with_secrets=False) assert result.returncode == 0 secrets.append(openai_secret) azure_secret = "test_multi_azure" create_azure = f"CREATE SECRET {azure_secret} (TYPE AZURE_LLM, API_KEY 'azure-key', RESOURCE_NAME 'resource', API_VERSION '2023-05-15');" - result = run_cli(duckdb_cli_path, db_path, create_azure) + result = run_cli(duckdb_cli_path, db_path, create_azure, with_secrets=False) assert result.returncode == 0 secrets.append(azure_secret) ollama_secret = "test_multi_ollama" create_ollama = f"CREATE SECRET {ollama_secret} (TYPE OLLAMA, API_URL 'http://localhost:11434');" - result = run_cli(duckdb_cli_path, db_path, create_ollama) + result = run_cli(duckdb_cli_path, db_path, create_ollama, with_secrets=False) assert result.returncode == 0 secrets.append(ollama_secret) @@ -121,7 +121,7 @@ def test_secret_scope_handling(integration_setup): duckdb_cli_path, db_path = integration_setup secret_name = "test_scope_secret" create_query = f"CREATE SECRET {secret_name} (TYPE OPENAI, API_KEY 'test-key');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 @@ -147,20 +147,20 @@ def test_create_secrets_parametrized(integration_setup, provider_type, required_ [f"{key} '{value}'" for key, value in required_fields.items()] ) create_query = f"CREATE SECRET {secret_name} (TYPE {provider_type}, {fields_str});" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 def test_persistent_secret_lifecycle(integration_setup): duckdb_cli_path, db_path = integration_setup - secret_name = "test_openai_secret" - + secret_name = "test_persistent_lifecycle_secret" + create_query = f"CREATE PERSISTENT SECRET {secret_name} (TYPE OPENAI, API_KEY 'test-persistent-key');" - result = run_cli(duckdb_cli_path, db_path, create_query) + result = run_cli(duckdb_cli_path, db_path, create_query, with_secrets=False) assert result.returncode == 0 check_query = f"SELECT name, type, persistent FROM duckdb_secrets() WHERE name = '{secret_name}';" - result = run_cli(duckdb_cli_path, db_path, check_query) + result = run_cli(duckdb_cli_path, db_path, check_query, with_secrets=False) assert secret_name in result.stdout assert "OPENAI" in result.stdout or "openai" in result.stdout assert ( @@ -170,9 +170,9 @@ def test_persistent_secret_lifecycle(integration_setup): ) drop_query = f"DROP PERSISTENT SECRET {secret_name};" - result = run_cli(duckdb_cli_path, db_path, drop_query) + result = run_cli(duckdb_cli_path, db_path, drop_query, with_secrets=False) assert result.returncode == 0 check_query = f"SELECT name FROM duckdb_secrets() WHERE name = '{secret_name}';" - result = run_cli(duckdb_cli_path, db_path, check_query) + result = run_cli(duckdb_cli_path, db_path, check_query, with_secrets=False) assert secret_name not in result.stdout diff --git a/test/unit/functions/aggregate/llm_first.cpp b/test/unit/functions/aggregate/llm_first.cpp index 5e86638c..e9f8132d 100644 --- a/test/unit/functions/aggregate/llm_first.cpp +++ b/test/unit/functions/aggregate/llm_first.cpp @@ -5,13 +5,11 @@ namespace flock { class LLMFirstTest : public LLMAggregateTestBase { protected: - // The LLM response (for mocking) static constexpr const char* LLM_RESPONSE = R"({"items":[0]})"; - // The expected function output (selected data) - static constexpr const char* EXPECTED_RESPONSE = R"([{"data":["High-performance running shoes with advanced cushioning"]}])"; + static constexpr const char* EXPECTED_RESPONSE_SINGLE = R"([{"data":["High-performance running shoes with advanced cushioning"]}])"; std::string GetExpectedResponse() const override { - return EXPECTED_RESPONSE; + return EXPECTED_RESPONSE_SINGLE; } nlohmann::json GetExpectedJsonResponse() const override { @@ -39,8 +37,30 @@ class LLMFirstTest : public LLMAggregateTestBase { } }; -// Test llm_first with SQL queries without GROUP BY - new API -TEST_F(LLMFirstTest, LLMFirstWithoutGroupBy) { +// Test 1-tuple case: no LLM call needed, returns the single tuple directly +TEST_F(LLMFirstTest, SingleTupleNoLLMCall) { + // No mock expectations - LLM should NOT be called for single tuple + auto con = Config::GetConnection(); + + const auto results = con.Query( + "SELECT llm_first(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the first product', 'context_columns': [{'data': description}]}" + ") AS first_product FROM VALUES " + "('High-performance running shoes with advanced cushioning') AS products(description);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + + nlohmann::json parsed = nlohmann::json::parse(results->GetValue(0, 0).GetValue()); + EXPECT_EQ(parsed.size(), 1); + EXPECT_TRUE(parsed[0].contains("data")); + EXPECT_EQ(parsed[0]["data"].size(), 1); + EXPECT_EQ(parsed[0]["data"][0], "High-performance running shoes with advanced cushioning"); +} + +// Test multiple tuples without GROUP BY: LLM is called once +TEST_F(LLMFirstTest, MultipleTuplesWithoutGroupBy) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) @@ -49,39 +69,42 @@ TEST_F(LLMFirstTest, LLMFirstWithoutGroupBy) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the most relevant detail for these products, based on their names and descriptions?', 'context_columns': [{'data': description}]}" - ") AS first_product_feature FROM VALUES " - "('High-performance running shoes with advanced cushioning'), " - "('Wireless noise-cancelling headphones for immersive audio'), " - "('Smart fitness tracker with heart rate monitoring') AS products(description);"); + "SELECT llm_first(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'What is the most relevant product?', 'context_columns': [{'data': description}]}" + ") AS first_product FROM VALUES " + "('High-performance running shoes with advanced cushioning'), " + "('Wireless noise-cancelling headphones for immersive audio'), " + "('Smart fitness tracker with heart rate monitoring') AS products(description);"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); ASSERT_EQ(results->GetValue(0, 0).GetValue(), GetExpectedResponse()); } -// Test llm_first with SQL queries with GROUP BY - new API -TEST_F(LLMFirstTest, LLMFirstWithGroupBy) { +// Test GROUP BY with multiple tuples per group: LLM is called for each group +TEST_F(LLMFirstTest, GroupByWithMultipleTuplesPerGroup) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); + .Times(2); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) + .Times(2) .WillRepeatedly(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the most relevant detail for these products, based on their names and descriptions?', 'context_columns': [{'data': description}]}" - ") AS first_feature FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Wireless noise-cancelling headphones for immersive audio'), " - "('fitness', 'Smart fitness tracker with heart rate monitoring') " - "AS products(category, description) GROUP BY category;"); + "SELECT category, llm_first(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the most relevant product', 'context_columns': [{'data': description}]}" + ") AS first_product FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('footwear', 'Business shoes for professionals'), " + "('electronics', 'Wireless headphones'), " + "('electronics', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); - ASSERT_EQ(results->RowCount(), 3); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 2); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); @@ -90,88 +113,53 @@ TEST_F(LLMFirstTest, LLMFirstWithGroupBy) { } } -// Test argument validation -TEST_F(LLMFirstTest, ValidateArguments) { - TestValidateArguments(); -} - -// Test operation with invalid arguments -TEST_F(LLMFirstTest, Operation_InvalidArguments_ThrowsException) { - TestOperationInvalidArguments(); -} - -// Test operation with multiple input scenarios - new API -TEST_F(LLMFirstTest, Operation_MultipleInputs_ProcessesCorrectly) { - const nlohmann::json expected_response = GetExpectedJsonResponse(); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - +// Test GROUP BY with single tuple per group: no LLM calls needed +TEST_F(LLMFirstTest, GroupByWithSingleTuplePerGroup) { + // No mock expectations - LLM should NOT be called when each group has only 1 tuple auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the most relevant product information?', 'context_columns': [{'data': description}]}" - ") AS first_relevant_info FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Wireless noise-cancelling headphones for immersive audio'), " - "('fitness', 'Smart fitness tracker with heart rate monitoring') " - "AS products(category, description) GROUP BY category;"); + "SELECT category, llm_first(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the most relevant product', 'context_columns': [{'data': description}]}" + ") AS first_product FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('electronics', 'Wireless headphones'), " + "('fitness', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 3); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); EXPECT_TRUE(parsed[0].contains("data")); + EXPECT_EQ(parsed[0]["data"].size(), 1); }); } } -// Test large input set processing - new API -TEST_F(LLMFirstTest, Operation_LargeInputSet_ProcessesCorrectly) { - constexpr size_t input_count = 100; - const nlohmann::json expected_response = PrepareExpectedResponseForLargeInput(input_count); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(100); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(100) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - - auto con = Config::GetConnection(); - - const auto results = con.Query( - "SELECT id, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Select the first relevant product based on relevance', 'context_columns': [{'data': 'Product description ' || id::TEXT}]}" - ") AS first_relevant FROM range(" + - std::to_string(input_count) + ") AS t(id) GROUP BY id;"); +// Test argument validation +TEST_F(LLMFirstTest, ValidateArguments) { + TestValidateArguments(); +} - ASSERT_EQ(results->RowCount(), 100); - for (idx_t i = 0; i < results->RowCount(); i++) { - EXPECT_NO_THROW({ - nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); - EXPECT_TRUE(parsed[0].contains("data")); - }); - } +// Test operation with invalid arguments +TEST_F(LLMFirstTest, InvalidArguments) { + TestOperationInvalidArguments(); } -// Test llm_first with audio transcription -TEST_F(LLMFirstTest, LLMFirstWithAudioTranscription) { - const nlohmann::json expected_transcription = "{\"text\": \"First audio candidate\"}"; +// Test with audio transcription +TEST_F(LLMFirstTest, AudioTranscription) { + const nlohmann::json expected_transcription1 = nlohmann::json::parse(R"({"text": "First audio candidate"})"); + const nlohmann::json expected_transcription2 = nlohmann::json::parse(R"({"text": "Second audio candidate"})"); const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); - // Mock transcription model EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) - .WillOnce(::testing::Return(std::vector{expected_transcription})); + .WillOnce(::testing::Return(std::vector{expected_transcription1, expected_transcription2})); - // Mock completion model EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) @@ -181,36 +169,37 @@ TEST_F(LLMFirstTest, LLMFirstWithAudioTranscription) { const auto results = con.Query( "SELECT llm_first(" "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Select the best audio candidate. Return ID 0.', " + "{'prompt': 'Select the best audio candidate', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " "'transcription_model': 'gpt-4o-transcribe'}" - "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); } // Test audio transcription error handling for Ollama -TEST_F(LLMFirstTest, LLMFirstAudioTranscriptionOllamaError) { +TEST_F(LLMFirstTest, AudioTranscriptionOllamaError) { auto con = Config::GetConnection(); - // Mock transcription model to throw error (simulating Ollama behavior) EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); - // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_first(" - "{'model_name': 'llama3'}, " - "{'prompt': 'Select the best audio. Return ID 0.', " + "{'model_name': 'gemma3:4b'}, " + "{'prompt': 'Select the best audio', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" - "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + "'transcription_model': 'gemma3:4b'}" + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); - // Should fail because Ollama doesn't support transcription ASSERT_TRUE(results->HasError()); } diff --git a/test/unit/functions/aggregate/llm_last.cpp b/test/unit/functions/aggregate/llm_last.cpp index 988fd032..f3f515d8 100644 --- a/test/unit/functions/aggregate/llm_last.cpp +++ b/test/unit/functions/aggregate/llm_last.cpp @@ -5,13 +5,11 @@ namespace flock { class LLMLastTest : public LLMAggregateTestBase { protected: - // The LLM response (for mocking) - for llm_last, it should select the last index - static constexpr const char* LLM_RESPONSE = R"({"items":[0]})"; - // The expected function output (selected data from the last position) - static constexpr const char* EXPECTED_RESPONSE = R"([{"data":["High-performance running shoes with advanced cushioning"]}])"; + static constexpr const char* LLM_RESPONSE = R"({"items":[2]})"; + static constexpr const char* EXPECTED_RESPONSE_SINGLE = R"([{"data":["Smart fitness tracker with heart rate monitoring"]}])"; std::string GetExpectedResponse() const override { - return EXPECTED_RESPONSE; + return EXPECTED_RESPONSE_SINGLE; } nlohmann::json GetExpectedJsonResponse() const override { @@ -27,11 +25,11 @@ class LLMLastTest : public LLMAggregateTestBase { } nlohmann::json PrepareExpectedResponseForBatch(const std::vector& responses) const override { - return nlohmann::json{{"selected", static_cast(responses.size() - 1)}}; + return nlohmann::json{{"items", {static_cast(responses.size() - 1)}}}; } nlohmann::json PrepareExpectedResponseForLargeInput(size_t input_count) const override { - return nlohmann::json{{"selected", static_cast(input_count - 1)}}; + return nlohmann::json{{"items", {static_cast(input_count - 1)}}}; } std::string FormatExpectedResult(const nlohmann::json& response) const override { @@ -39,90 +37,75 @@ class LLMLastTest : public LLMAggregateTestBase { } }; -// Test llm_last with SQL queries without GROUP BY - new API -TEST_F(LLMLastTest, LLMLastWithoutGroupBy) { - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(1); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .WillOnce(::testing::Return(std::vector{GetExpectedJsonResponse()})); - +// Test 1-tuple case: no LLM call needed, returns the single tuple directly +TEST_F(LLMLastTest, SingleTupleNoLLMCall) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the least relevant detail for these products, based on their names and descriptions?', 'context_columns': [{'data': description}]}" - ") AS last_product_feature FROM VALUES " - "('High-performance running shoes with advanced cushioning'), " - "('Wireless noise-cancelling headphones for immersive audio'), " - "('Smart fitness tracker with heart rate monitoring') AS products(description);"); + "SELECT llm_last(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the last product', 'context_columns': [{'data': description}]}" + ") AS last_product FROM VALUES " + "('High-performance running shoes with advanced cushioning') AS products(description);"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); - ASSERT_EQ(results->GetValue(0, 0).GetValue(), GetExpectedResponse()); + + nlohmann::json parsed = nlohmann::json::parse(results->GetValue(0, 0).GetValue()); + EXPECT_EQ(parsed.size(), 1); + EXPECT_TRUE(parsed[0].contains("data")); + EXPECT_EQ(parsed[0]["data"].size(), 1); + EXPECT_EQ(parsed[0]["data"][0], "High-performance running shoes with advanced cushioning"); } -// Test llm_last with SQL queries with GROUP BY - new API -TEST_F(LLMLastTest, LLMLastWithGroupBy) { +// Test multiple tuples without GROUP BY: LLM is called once +TEST_F(LLMLastTest, MultipleTuplesWithoutGroupBy) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); + .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{GetExpectedJsonResponse()})); + .WillOnce(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the least relevant detail for these products, based on their names and descriptions?', 'context_columns': [{'data': description}]}" - ") AS last_feature FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Wireless noise-cancelling headphones for immersive audio'), " - "('fitness', 'Smart fitness tracker with heart rate monitoring') " - "AS products(category, description) GROUP BY category;"); - - ASSERT_EQ(results->RowCount(), 3); - for (idx_t i = 0; i < results->RowCount(); i++) { - EXPECT_NO_THROW({ - nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); - EXPECT_TRUE(parsed[0].contains("data")); - }); - } -} - -// Test argument validation -TEST_F(LLMLastTest, ValidateArguments) { - TestValidateArguments(); -} + "SELECT llm_last(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'What is the least relevant product?', 'context_columns': [{'data': description}]}" + ") AS last_product FROM VALUES " + "('High-performance running shoes with advanced cushioning'), " + "('Wireless noise-cancelling headphones for immersive audio'), " + "('Smart fitness tracker with heart rate monitoring') AS products(description);"); -// Test operation with invalid arguments -TEST_F(LLMLastTest, Operation_InvalidArguments_ThrowsException) { - TestOperationInvalidArguments(); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + ASSERT_EQ(results->GetValue(0, 0).GetValue(), GetExpectedResponse()); } -// Test operation with multiple input scenarios - new API -TEST_F(LLMLastTest, Operation_MultipleInputs_ProcessesCorrectly) { - const nlohmann::json expected_response = GetExpectedJsonResponse(); +// Test GROUP BY with multiple tuples per group: LLM is called for each group +TEST_F(LLMLastTest, GroupByWithMultipleTuplesPerGroup) { + nlohmann::json response_index_1 = nlohmann::json{{"items", {1}}}; EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); + .Times(2); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); + .Times(2) + .WillRepeatedly(::testing::Return(std::vector{response_index_1})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'What is the least relevant product information?', 'context_columns': [{'data': description}]}" - ") AS last_relevant_info FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Wireless noise-cancelling headphones for immersive audio'), " - "('fitness', 'Smart fitness tracker with heart rate monitoring') " - "AS products(category, description) GROUP BY category;"); + "SELECT category, llm_last(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the least relevant product', 'context_columns': [{'data': description}]}" + ") AS last_product FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('footwear', 'Business shoes for professionals'), " + "('electronics', 'Wireless headphones'), " + "('electronics', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); - ASSERT_EQ(results->RowCount(), 3); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 2); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); @@ -131,86 +114,92 @@ TEST_F(LLMLastTest, Operation_MultipleInputs_ProcessesCorrectly) { } } -// Test large input set processing - new API -TEST_F(LLMLastTest, Operation_LargeInputSet_ProcessesCorrectly) { - constexpr size_t input_count = 100; - const nlohmann::json expected_response = GetExpectedJsonResponse(); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(100); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(100) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - +// Test GROUP BY with single tuple per group: no LLM calls needed +TEST_F(LLMLastTest, GroupByWithSingleTuplePerGroup) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT id, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Select the last relevant product based on relevance', 'context_columns': [{'data': 'Product description ' || id::TEXT}]}" - ") AS last_relevant FROM range(" + - std::to_string(input_count) + ") AS t(id) GROUP BY id;"); + "SELECT category, llm_last(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Select the least relevant product', 'context_columns': [{'data': description}]}" + ") AS last_product FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('electronics', 'Wireless headphones'), " + "('fitness', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); - ASSERT_EQ(results->RowCount(), 100); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 3); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); EXPECT_TRUE(parsed[0].contains("data")); + EXPECT_EQ(parsed[0]["data"].size(), 1); }); } } -// Test llm_last with audio transcription -TEST_F(LLMLastTest, LLMLastWithAudioTranscription) { - const nlohmann::json expected_transcription = "{\"text\": \"Last audio candidate\"}"; - const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); +// Test argument validation +TEST_F(LLMLastTest, ValidateArguments) { + TestValidateArguments(); +} + +// Test operation with invalid arguments +TEST_F(LLMLastTest, InvalidArguments) { + TestOperationInvalidArguments(); +} + +// Test with audio transcription +TEST_F(LLMLastTest, AudioTranscription) { + const nlohmann::json expected_transcription1 = nlohmann::json::parse(R"({"text": "First audio candidate"})"); + const nlohmann::json expected_transcription2 = nlohmann::json::parse(R"({"text": "Last audio candidate"})"); + nlohmann::json response_index_1 = nlohmann::json{{"items", {1}}}; - // Mock transcription model EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) - .WillOnce(::testing::Return(std::vector{expected_transcription})); + .WillOnce(::testing::Return(std::vector{expected_transcription1, expected_transcription2})); - // Mock completion model EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .WillOnce(::testing::Return(std::vector{expected_complete_response})); + .WillOnce(::testing::Return(std::vector{response_index_1})); auto con = Config::GetConnection(); const auto results = con.Query( "SELECT llm_last(" "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Select the worst audio candidate. Return ID 0.', " + "{'prompt': 'Select the worst audio candidate', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " "'transcription_model': 'gpt-4o-transcribe'}" - "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); } // Test audio transcription error handling for Ollama -TEST_F(LLMLastTest, LLMLastAudioTranscriptionOllamaError) { +TEST_F(LLMLastTest, AudioTranscriptionOllamaError) { auto con = Config::GetConnection(); - // Mock transcription model to throw error (simulating Ollama behavior) EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); - // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_last(" - "{'model_name': 'llama3'}, " - "{'prompt': 'Select the worst audio. Return ID 0.', " + "{'model_name': 'gemma3:4b'}, " + "{'prompt': 'Select the worst audio', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" - "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + "'transcription_model': 'gemma3:4b'}" + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); - // Should fail because Ollama doesn't support transcription ASSERT_TRUE(results->HasError()); } diff --git a/test/unit/functions/aggregate/llm_reduce.cpp b/test/unit/functions/aggregate/llm_reduce.cpp index b730c3f4..50e27a4c 100644 --- a/test/unit/functions/aggregate/llm_reduce.cpp +++ b/test/unit/functions/aggregate/llm_reduce.cpp @@ -5,7 +5,7 @@ namespace flock { class LLMReduceTest : public LLMAggregateTestBase { protected: - static constexpr const char* EXPECTED_RESPONSE = "A comprehensive summary of running shoes, wireless headphones, and smart watches, featuring advanced technology and user-friendly designs for active lifestyles."; + static constexpr const char* EXPECTED_RESPONSE = "A comprehensive summary of products."; std::string GetExpectedResponse() const override { return EXPECTED_RESPONSE; @@ -39,8 +39,8 @@ class LLMReduceTest : public LLMAggregateTestBase { } }; -// Test llm_reduce with SQL queries without GROUP BY - new API -TEST_F(LLMReduceTest, LLMReduceWithoutGroupBy) { +// Test single tuple: LLM is still called for reduce (to summarize) +TEST_F(LLMReduceTest, SingleTupleWithLLMCall) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) @@ -49,124 +49,117 @@ TEST_F(LLMReduceTest, LLMReduceWithoutGroupBy) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" - ") AS product_summary FROM VALUES " - "('High-performance running shoes with advanced cushioning'), " - "('Wireless noise-cancelling headphones for immersive audio'), " - "('Smart fitness tracker with heart rate monitoring') AS products(description);"); + "SELECT llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" + ") AS product_summary FROM VALUES " + "('High-performance running shoes with advanced cushioning') AS products(description);"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); ASSERT_EQ(results->GetValue(0, 0).GetValue(), GetExpectedResponse()); } -// Test llm_reduce with SQL queries with GROUP BY - new API -TEST_F(LLMReduceTest, LLMReduceWithGroupBy) { +// Test multiple tuples without GROUP BY: LLM is called once +TEST_F(LLMReduceTest, MultipleTuplesWithoutGroupBy) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); + .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{GetExpectedJsonResponse()})); + .WillOnce(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" - ") AS description_summary FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Wireless noise-cancelling headphones for immersive audio'), " - "('fitness', 'Smart fitness tracker with heart rate monitoring') " - "AS products(category, description) GROUP BY category;"); + "SELECT llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" + ") AS product_summary FROM VALUES " + "('High-performance running shoes with advanced cushioning'), " + "('Wireless noise-cancelling headphones for immersive audio'), " + "('Smart fitness tracker with heart rate monitoring') AS products(description);"); - ASSERT_EQ(results->RowCount(), 3); - ASSERT_EQ(results->GetValue(1, 0).GetValue(), GetExpectedResponse()); - ASSERT_EQ(results->GetValue(1, 1).GetValue(), GetExpectedResponse()); - ASSERT_EQ(results->GetValue(1, 2).GetValue(), GetExpectedResponse()); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + ASSERT_EQ(results->GetValue(0, 0).GetValue(), GetExpectedResponse()); } -// Test argument validation -TEST_F(LLMReduceTest, ValidateArguments) { - TestValidateArguments(); -} +// Test GROUP BY with multiple tuples per group: LLM is called for each group +TEST_F(LLMReduceTest, GroupByWithMultipleTuplesPerGroup) { + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(2); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .Times(2) + .WillRepeatedly(::testing::Return(std::vector{GetExpectedJsonResponse()})); -// Test operation with invalid arguments -TEST_F(LLMReduceTest, Operation_InvalidArguments_ThrowsException) { - TestOperationInvalidArguments(); -} + auto con = Config::GetConnection(); -// Test operation with multiple input scenarios - new API -TEST_F(LLMReduceTest, Operation_MultipleInputs_ProcessesCorrectly) { - const nlohmann::json expected_response = GetExpectedJsonResponse(); + const auto results = con.Query( + "SELECT category, llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" + ") AS description_summary FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('footwear', 'Business shoes for professionals'), " + "('electronics', 'Wireless headphones'), " + "('electronics', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 2); + ASSERT_EQ(results->GetValue(1, 0).GetValue(), GetExpectedResponse()); + ASSERT_EQ(results->GetValue(1, 1).GetValue(), GetExpectedResponse()); +} + +// Test GROUP BY with single tuple per group: LLM is still called (reduce always calls LLM) +TEST_F(LLMReduceTest, GroupByWithSingleTuplePerGroup) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(3); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) .Times(3) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); + .WillRepeatedly(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT name, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Summarize the following product information', 'context_columns': [{'data': name}, {'data': description}]}" - ") AS comprehensive_summary FROM VALUES " - "('Running Shoes', 'High-performance running shoes with advanced cushioning'), " - "('Headphones', 'Wireless noise-cancelling headphones for immersive audio'), " - "('Fitness Tracker', 'Smart fitness tracker with heart rate monitoring') " - "AS products(name, description) GROUP BY name;"); + "SELECT category, llm_reduce(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following product descriptions', 'context_columns': [{'data': description}]}" + ") AS description_summary FROM VALUES " + "('electronics', 'Running shoes with advanced cushioning'), " + "('audio', 'Wireless noise-cancelling headphones'), " + "('fitness', 'Smart fitness tracker with heart rate monitoring') " + "AS products(category, description) GROUP BY category;"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 3); ASSERT_EQ(results->GetValue(1, 0).GetValue(), GetExpectedResponse()); ASSERT_EQ(results->GetValue(1, 1).GetValue(), GetExpectedResponse()); ASSERT_EQ(results->GetValue(1, 2).GetValue(), GetExpectedResponse()); } -// Test large input set processing - new API -TEST_F(LLMReduceTest, Operation_LargeInputSet_ProcessesCorrectly) { - constexpr size_t input_count = 100; - const nlohmann::json expected_response = PrepareExpectedResponseForLargeInput(input_count); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(100); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(100) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - - auto con = Config::GetConnection(); +// Test argument validation +TEST_F(LLMReduceTest, ValidateArguments) { + TestValidateArguments(); +} - const auto results = con.Query( - "SELECT id, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Summarize all product descriptions', 'context_columns': [{'data': 'Product description ' || id::TEXT}]}" - ") AS large_summary FROM range(" + - std::to_string(input_count) + ") AS t(id) GROUP BY id;"); - - ASSERT_EQ(results->RowCount(), 100); - for (size_t i = 0; i < input_count; i++) { - ASSERT_EQ(results->GetValue(1, i).GetValue(), FormatExpectedResult(expected_response)); - } +// Test operation with invalid arguments +TEST_F(LLMReduceTest, InvalidArguments) { + TestOperationInvalidArguments(); } -// Test llm_reduce with audio transcription -TEST_F(LLMReduceTest, LLMReduceWithAudioTranscription) { - const nlohmann::json expected_transcription = "{\"text\": \"This is a transcribed audio summary\"}"; - const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); +// Test with audio transcription +TEST_F(LLMReduceTest, AudioTranscription) { + const nlohmann::json expected_transcription = nlohmann::json::parse(R"({"text": "This is a transcribed audio summary"})"); - // Mock transcription model EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) .WillOnce(::testing::Return(std::vector{expected_transcription})); - // Mock completion model EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .WillOnce(::testing::Return(std::vector{expected_complete_response})); + .WillOnce(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( @@ -183,10 +176,9 @@ TEST_F(LLMReduceTest, LLMReduceWithAudioTranscription) { ASSERT_EQ(results->RowCount(), 1); } -// Test llm_reduce with audio and text columns -TEST_F(LLMReduceTest, LLMReduceWithAudioAndText) { - const nlohmann::json expected_transcription = "{\"text\": \"Product audio review\"}"; - const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); +// Test with audio and text columns +TEST_F(LLMReduceTest, AudioAndTextColumns) { + const nlohmann::json expected_transcription = nlohmann::json::parse(R"({"text": "Product audio review"})"); EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .Times(1); @@ -196,7 +188,7 @@ TEST_F(LLMReduceTest, LLMReduceWithAudioAndText) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .WillOnce(::testing::Return(std::vector{expected_complete_response})); + .WillOnce(::testing::Return(std::vector{GetExpectedJsonResponse()})); auto con = Config::GetConnection(); const auto results = con.Query( @@ -215,24 +207,21 @@ TEST_F(LLMReduceTest, LLMReduceWithAudioAndText) { } // Test audio transcription error handling for Ollama -TEST_F(LLMReduceTest, LLMReduceAudioTranscriptionOllamaError) { +TEST_F(LLMReduceTest, AudioTranscriptionOllamaError) { auto con = Config::GetConnection(); - // Mock transcription model to throw error (simulating Ollama behavior) EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); - // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_reduce(" - "{'model_name': 'llama3'}, " + "{'model_name': 'gemma3:4b'}, " "{'prompt': 'Summarize this audio', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" + "'transcription_model': 'gemma3:4b'}" "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); - // Should fail because Ollama doesn't support transcription ASSERT_TRUE(results->HasError()); } diff --git a/test/unit/functions/aggregate/llm_rerank.cpp b/test/unit/functions/aggregate/llm_rerank.cpp index 0408efc2..7edbd556 100644 --- a/test/unit/functions/aggregate/llm_rerank.cpp +++ b/test/unit/functions/aggregate/llm_rerank.cpp @@ -6,18 +6,15 @@ namespace flock { class LLMRerankTest : public LLMAggregateTestBase { protected: - // The LLM response (for mocking) - returns ranking indices - static constexpr const char* LLM_RESPONSE_WITHOUT_GROUP_BY = R"({"items":[0, 1, 2]})"; - static constexpr const char* LLM_RESPONSE_WITH_GROUP_BY = R"({"items":[0]})"; - // The expected function output (reranked data as JSON array) - static constexpr const char* EXPECTED_RESPONSE = R"([{"product_description":"High-performance running shoes with advanced cushioning"},{"product_description":"Professional business shoes"},{"product_description":"Casual sneakers for everyday wear"}])"; + static constexpr const char* LLM_RESPONSE = R"({"items":[0, 1, 2]})"; + static constexpr const char* EXPECTED_RESPONSE_SINGLE = R"([{"data":["High-performance running shoes with advanced cushioning"]}])"; std::string GetExpectedResponse() const override { - return EXPECTED_RESPONSE; + return EXPECTED_RESPONSE_SINGLE; } nlohmann::json GetExpectedJsonResponse() const override { - return nlohmann::json::parse(LLM_RESPONSE_WITHOUT_GROUP_BY); + return nlohmann::json::parse(LLM_RESPONSE); } std::string GetFunctionName() const override { @@ -45,8 +42,29 @@ class LLMRerankTest : public LLMAggregateTestBase { } }; -// Test llm_rerank with SQL queries without GROUP BY - new API -TEST_F(LLMRerankTest, LLMRerankWithoutGroupBy) { +// Test 1-tuple case: no LLM call needed, returns the single tuple directly +TEST_F(LLMRerankTest, SingleTupleNoLLMCall) { + auto con = Config::GetConnection(); + + const auto results = con.Query( + "SELECT llm_rerank(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Rank these products', 'context_columns': [{'data': description}]}" + ") AS reranked_products FROM VALUES " + "('High-performance running shoes with advanced cushioning') AS products(description);"); + + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 1); + + nlohmann::json parsed = nlohmann::json::parse(results->GetValue(0, 0).GetValue()); + EXPECT_EQ(parsed.size(), 1); + EXPECT_TRUE(parsed[0].contains("data")); + EXPECT_EQ(parsed[0]["data"].size(), 1); + EXPECT_EQ(parsed[0]["data"][0], "High-performance running shoes with advanced cushioning"); +} + +// Test multiple tuples without GROUP BY: LLM is called once +TEST_F(LLMRerankTest, MultipleTuplesWithoutGroupBy) { EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) @@ -55,14 +73,15 @@ TEST_F(LLMRerankTest, LLMRerankWithoutGroupBy) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Rank these products by their relevance and quality based on descriptions', 'context_columns': [{'data': description}]}" - ") AS reranked_products FROM VALUES " - "('High-performance running shoes with advanced cushioning'), " - "('Professional business shoes'), " - "('Casual sneakers for everyday wear') AS products(description);"); + "SELECT llm_rerank(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Rank these products by relevance', 'context_columns': [{'data': description}]}" + ") AS reranked_products FROM VALUES " + "('High-performance running shoes with advanced cushioning'), " + "('Professional business shoes'), " + "('Casual sneakers for everyday wear') AS products(description);"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(0, 0).GetValue()); @@ -72,131 +91,90 @@ TEST_F(LLMRerankTest, LLMRerankWithoutGroupBy) { }); } -// Test llm_rerank with SQL queries with GROUP BY - new API -TEST_F(LLMRerankTest, LLMRerankWithGroupBy) { +// Test GROUP BY with multiple tuples per group: LLM is called for each group +TEST_F(LLMRerankTest, GroupByWithMultipleTuplesPerGroup) { + nlohmann::json response_2_items = nlohmann::json{{"items", {1, 0}}}; + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); + .Times(2); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{nlohmann::json::parse(LLM_RESPONSE_WITH_GROUP_BY)})); + .Times(2) + .WillRepeatedly(::testing::Return(std::vector{response_2_items})); auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Rank these products by their relevance and quality based on descriptions', 'context_columns': [{'data': description}]}" - ") AS reranked_products FROM VALUES " - "('electronics', 'High-performance running shoes with advanced cushioning'), " - "('audio', 'Professional business shoes'), " - "('fitness', 'Casual sneakers for everyday wear') " - "AS products(category, description) GROUP BY category;"); + "SELECT category, llm_rerank(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Rank these products by relevance', 'context_columns': [{'data': description}]}" + ") AS reranked_products FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('footwear', 'Business shoes for professionals'), " + "('electronics', 'Wireless headphones'), " + "('electronics', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); - ASSERT_EQ(results->RowCount(), 3); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); + ASSERT_EQ(results->RowCount(), 2); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); - EXPECT_EQ(parsed.size(), 1); EXPECT_TRUE(parsed[0].contains("data")); - EXPECT_EQ(parsed[0]["data"].size(), 1); + EXPECT_EQ(parsed[0]["data"].size(), 2); }); } } -// Test argument validation -TEST_F(LLMRerankTest, ValidateArguments) { - TestValidateArguments(); -} - -// Test operation with invalid arguments -TEST_F(LLMRerankTest, Operation_InvalidArguments_ThrowsException) { - TestOperationInvalidArguments(); -} - -// Test operation with multiple input scenarios - new API -TEST_F(LLMRerankTest, Operation_MultipleInputs_ProcessesCorrectly) { - const nlohmann::json expected_response = nlohmann::json::parse(LLM_RESPONSE_WITH_GROUP_BY); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(3); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(3) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - +// Test GROUP BY with single tuple per group: no LLM calls needed +TEST_F(LLMRerankTest, GroupByWithSingleTuplePerGroup) { auto con = Config::GetConnection(); const auto results = con.Query( - "SELECT category, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Rank products by relevance to customer preferences', 'context_columns': [{'data': id::TEXT}, {'data': description}]}" - ") AS reranked_products FROM VALUES " - "('electronics', 1, 'High-performance running shoes with advanced cushioning'), " - "('audio', 2, 'Professional business shoes'), " - "('fitness', 3, 'Casual sneakers for everyday wear') " - "AS products(category, id, description) GROUP BY category;"); + "SELECT category, llm_rerank(" + "{'model_name': 'gpt-4o'}, " + "{'prompt': 'Rank these products by relevance', 'context_columns': [{'data': description}]}" + ") AS reranked_products FROM VALUES " + "('footwear', 'Running shoes with cushioning'), " + "('electronics', 'Wireless headphones'), " + "('fitness', 'Smart fitness tracker') " + "AS products(category, description) GROUP BY category;"); + ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 3); for (idx_t i = 0; i < results->RowCount(); i++) { EXPECT_NO_THROW({ nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); - EXPECT_EQ(parsed.size(), 2); EXPECT_TRUE(parsed[0].contains("data")); EXPECT_EQ(parsed[0]["data"].size(), 1); }); } } -// Test large input set processing - new API -TEST_F(LLMRerankTest, Operation_LargeInputSet_ProcessesCorrectly) { - constexpr size_t input_count = 100; - const nlohmann::json expected_response = nlohmann::json::parse(LLM_RESPONSE_WITH_GROUP_BY); - - EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) - .Times(100); - EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .Times(100) - .WillRepeatedly(::testing::Return(std::vector{expected_response})); - - auto con = Config::GetConnection(); - - const auto results = con.Query( - "SELECT id, " + GetFunctionName() + "(" - "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Rerank products by relevance and importance', 'context_columns': [{'data': id::TEXT}, {'data': 'Product description ' || id::TEXT}]}" - ") AS reranked_products FROM range(" + - std::to_string(input_count) + ") AS t(id) GROUP BY id;"); - - ASSERT_EQ(results->RowCount(), 100); - for (idx_t i = 0; i < results->RowCount(); i++) { - EXPECT_NO_THROW({ - nlohmann::json parsed = nlohmann::json::parse(results->GetValue(1, i).GetValue()); - EXPECT_EQ(parsed.size(), 2); - EXPECT_TRUE(parsed[0].contains("data")); - EXPECT_EQ(parsed[0]["data"].size(), 1); - }); - } +// Test argument validation +TEST_F(LLMRerankTest, ValidateArguments) { + TestValidateArguments(); +} - ::testing::Mock::AllowLeak(mock_provider.get()); +// Test operation with invalid arguments +TEST_F(LLMRerankTest, InvalidArguments) { + TestOperationInvalidArguments(); } -// Test llm_rerank with audio transcription -TEST_F(LLMRerankTest, LLMRerankWithAudioTranscription) { - const nlohmann::json expected_transcription1 = "{\"text\": \"First audio candidate\"}"; - const nlohmann::json expected_transcription2 = "{\"text\": \"Second audio candidate\"}"; - const nlohmann::json expected_complete_response = GetExpectedJsonResponse(); +// Test with audio transcription +TEST_F(LLMRerankTest, AudioTranscription) { + const nlohmann::json expected_transcription1 = nlohmann::json::parse(R"({"text": "First audio candidate"})"); + const nlohmann::json expected_transcription2 = nlohmann::json::parse(R"({"text": "Second audio candidate"})"); + nlohmann::json response_2_items = nlohmann::json{{"items", {1, 0}}}; - // Mock transcription model (called for each audio file) EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) - .Times(2); + .Times(1); EXPECT_CALL(*mock_provider, CollectTranscriptions("multipart/form-data")) - .WillOnce(::testing::Return(std::vector{expected_transcription1})) - .WillOnce(::testing::Return(std::vector{expected_transcription2})); + .WillOnce(::testing::Return(std::vector{expected_transcription1, expected_transcription2})); - // Mock completion model EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(1); EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) - .WillOnce(::testing::Return(std::vector{expected_complete_response})); + .WillOnce(::testing::Return(std::vector{response_2_items})); auto con = Config::GetConnection(); const auto results = con.Query( @@ -204,37 +182,35 @@ TEST_F(LLMRerankTest, LLMRerankWithAudioTranscription) { "{'model_name': 'gpt-4o'}, " "{'prompt': 'Rank these audio candidates from best to worst', " "'context_columns': [" - "{'data': 'https://example.com/audio1.mp3', " - "'type': 'audio', " - "'transcription_model': 'gpt-4o-transcribe'}, " - "{'data': 'https://example.com/audio2.mp3', " + "{'data': audio_url, " "'type': 'audio', " "'transcription_model': 'gpt-4o-transcribe'}" - "]}) AS result FROM VALUES (1) AS tbl(id);"); + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); ASSERT_FALSE(results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), 1); } // Test audio transcription error handling for Ollama -TEST_F(LLMRerankTest, LLMRerankAudioTranscriptionOllamaError) { +TEST_F(LLMRerankTest, AudioTranscriptionOllamaError) { auto con = Config::GetConnection(); - // Mock transcription model to throw error (simulating Ollama behavior) EXPECT_CALL(*mock_provider, AddTranscriptionRequest(::testing::_)) .WillOnce(::testing::Throw(std::runtime_error("Audio transcription is not currently supported by Ollama."))); - // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_rerank(" - "{'model_name': 'llama3'}, " + "{'model_name': 'gemma3:4b'}, " "{'prompt': 'Rank these audio files', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" - "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); + "'transcription_model': 'gemma3:4b'}" + "]}) AS result FROM VALUES " + "('https://example.com/audio1.mp3'), " + "('https://example.com/audio2.mp3') AS tbl(audio_url);"); - // Should fail because Ollama doesn't support transcription ASSERT_TRUE(results->HasError()); } From 9a4fe206192386263a7e54d6be310147224a266b Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:51 -0500 Subject: [PATCH 46/59] Updated unit tests for scalar LLM functions --- test/unit/functions/scalar/llm_complete.cpp | 4 ++-- test/unit/functions/scalar/llm_filter.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unit/functions/scalar/llm_complete.cpp b/test/unit/functions/scalar/llm_complete.cpp index ccc6f93b..74ed613b 100644 --- a/test/unit/functions/scalar/llm_complete.cpp +++ b/test/unit/functions/scalar/llm_complete.cpp @@ -234,12 +234,12 @@ TEST_F(LLMCompleteTest, LLMCompleteAudioTranscriptionError) { // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_complete(" - "{'model_name': 'llama3'}, " + "{'model_name': 'gemma3:4b'}, " "{'prompt': 'Summarize this audio', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" + "'transcription_model': 'gemma3:4b'}" "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); // Should fail because Ollama doesn't support transcription diff --git a/test/unit/functions/scalar/llm_filter.cpp b/test/unit/functions/scalar/llm_filter.cpp index ed822136..df8a7754 100644 --- a/test/unit/functions/scalar/llm_filter.cpp +++ b/test/unit/functions/scalar/llm_filter.cpp @@ -191,12 +191,12 @@ TEST_F(LLMFilterTest, LLMFilterAudioTranscriptionOllamaError) { // Test with Ollama which doesn't support transcription const auto results = con.Query( "SELECT llm_filter(" - "{'model_name': 'llama3'}, " + "{'model_name': 'gemma3:4b'}, " "{'prompt': 'Is the sentiment positive?', " "'context_columns': [" "{'data': audio_url, " "'type': 'audio', " - "'transcription_model': 'llama3'}" + "'transcription_model': 'gemma3:4b'}" "]}) AS result FROM VALUES ('https://example.com/audio.mp3') AS tbl(audio_url);"); // Should fail because Ollama doesn't support transcription From 85431b8a1bf65b9950168a61e85cb2c4c7ad8284 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:44:56 -0500 Subject: [PATCH 47/59] Updated unit tests for model manager and providers --- test/unit/model_manager/model_manager_test.cpp | 2 +- test/unit/model_manager/model_providers_test.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/model_manager/model_manager_test.cpp b/test/unit/model_manager/model_manager_test.cpp index 8d328671..e67cde06 100644 --- a/test/unit/model_manager/model_manager_test.cpp +++ b/test/unit/model_manager/model_manager_test.cpp @@ -99,7 +99,7 @@ TEST_F(ModelManagerTest, ProviderSelection) { }); // Test Ollama provider json ollama_config = { - {"model_name", "llama3"}}; + {"model_name", "gemma3:4b"}}; EXPECT_NO_THROW({ Model ollama_model(ollama_config); EXPECT_EQ(ollama_model.GetModelDetails().provider_name, "ollama"); diff --git a/test/unit/model_manager/model_providers_test.cpp b/test/unit/model_manager/model_providers_test.cpp index cc55eadb..1baab5e2 100644 --- a/test/unit/model_manager/model_providers_test.cpp +++ b/test/unit/model_manager/model_providers_test.cpp @@ -101,7 +101,7 @@ TEST(ModelProvidersTest, AzureProviderTest) { TEST(ModelProvidersTest, OllamaProviderTest) { ModelDetails model_details; model_details.model_name = "test_model"; - model_details.model = "llama3"; + model_details.model = "gemma3:4b"; model_details.provider_name = "ollama"; model_details.model_parameters = {{"temperature", 0.7}}; model_details.secret = {{"api_url", "http://localhost:11434"}}; @@ -158,7 +158,7 @@ TEST(ModelProvidersTest, OllamaProviderTest) { TEST(ModelProvidersTest, OllamaProviderTranscriptionError) { ModelDetails model_details; model_details.model_name = "test_model"; - model_details.model = "llama3"; + model_details.model = "gemma3:4b"; model_details.provider_name = "ollama"; model_details.model_parameters = {{"temperature", 0.7}}; model_details.secret = {{"api_url", "http://localhost:11434"}}; From ba54ffa6d72e8231a61b6564fff49e1ebc4f88c6 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:45:04 -0500 Subject: [PATCH 48/59] Updated unit test database with latest test data --- test/unit/unit_test.db | Bin 4206592 -> 4730880 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/unit/unit_test.db b/test/unit/unit_test.db index e3194bcb637c89601e639a3bc97183ba099575a4..f850e71069325d03175c6f1d3b50fff0480df020 100644 GIT binary patch delta 4205 zcmdT{du$ZP8Q*X2;XOa^uzh}=@%aE_gN-NH2H)XeXp@kT%9uX_DN1Xf@$%{!8{4QT z2>FnzffNOuwJJlChF2mvg_eXKWm}QNN202tM7517krbu4N>qtR2&ZWfs3q5LcJF+9 zM~M3Rrz_3v>~FsL=6n3+o4N3Gn1rWKkP}YF^vCz7e8bk2tN_vLZzw9^WEmsnwS@f=iuh z@i|?JbT=m+-ztH1n*<+ZMKAtl5NvKmx}P6?et8p+bH(Z#tuKATUu1N5{4| z^G(wgsr0|rKv6@ny+0HR_661Y)APVsCdaf4A83VkOVR)wu_x>=PEW2nSUZaEAa3UN1t6OT*m53Do%++ir>gNMPNZyKEw zkMp15j1i-BSyS`S?!T0YnF&2%1g@2509Pt9fN_stza&tjW~6Hr`y@@{YjX{t!dL~@ zI9g>E!7A<1ZiZllXyH)^JNU-qa211)tA~(xjDlDD=q=bmMCvy=c*G4=P=-NSVtHsv z_?|N`f;yapcUD3qIyp7hrmY!=Pj#K@5-PYO0xtf+ zDCBG3ybS-aA=wTJV!WBEHPc{7?u|e;|K1|V)neD6%0i_1sw=dGcbCD8(AsTJRua<| z$4s`i;-PauHirU%zh(l{c#@>zi4s?{c>^KkV)g|*$kN8fax;LJ^D+Q6UpxB=)Y*CC zZFnX{Dl30H1RKn>K~nLbgkZgyXhr_nA}HQ$D$jc(vlkNW{)> z?<8|Zy>iM(E$$=l+lAY|kf8=InX?gz89m)*{YN(td#0cnBB=-++ec0nepF}EYurE-gKc+Dbfs3*T`%{EpJ4X67G#~3 z^_9zscWh55k+A~F7uJ#0fU@(jSc_Yopwnz+;^WVeiiI_b^mf`ggup9!4yA~Kz@bVF zzC&*7SA;zd{Tfl{OxzxlZ&%`*;JYBvx8mE%Km0ve!|NwVDeuG|$mjdINJ(*?Nye7! ztHGHf+n0{d3+_h$?60wlBLA@r^>`$`OwxVS(8syivrdkBZ4Y%3FNep|$|VBvOyE{& z%Eco&hKorzlJBIe!bP+BhDZFl36jfeyKsm1c9HpILP|UqY&m)lgMn`=KiNgR@LS!( z1Kk0S)*YzqT*RoAQ5&OnMza`|8Fet4&1epzxs2v9n$KtfqfSO$jLu=SkkKMWiy3t@ zTEb{4qh*YiGg`r@htWz#=Q3KwsF%@dMtzLVV{|^F3mC0obRna)jMg!Vi!Nrgp3w$I zmoVDMC}s2!Mwc?$v^I7>Ebs9wo)CWri?NA!^1oP5;`^&`JC4o4Cf>$Br~_+h5w>D% zZfz|sxe#Zxl~F&V0Y+EI&wPIA%N5(#)^nK_J-Mj^r)|O}-pNbe(DvsQU!ZAey|1so zR|$0u1P6nC%CIsxH)@}j9 zKWl9+^72<6+Ir$2f#cx_-WYP4Lzjciz2oDz z-ZuI0+vP^l@E88p2HkDGw+_XAwc|U9X(#vUPH_4+hCJnEDdW%4e15lK+0Ey7LeTzA z=Nbq)FJ3qdRwQNo*z0`j=1qWaR@kSvJO;h?ZSfO$vd+m&cX%CdVrH_LQA|!IC3!s4F_L6;v(?F>vO`=79Hh zAoJU2;faX7>ijOKw9ju`!_TMA-){GBt{(ekJ6J1HhCpUUU^S+UNgtCtYOPKgCs#4S zC#_h@I2>k<`4ohmlhfxcm2JVVg5CGv3Oo9ImAd>Xm@=JQ_cuIta%?|TGu3sblbqPD z-4|0`>16x1fs>saK_@#M;{y{1)v*rvlWAwq);pQLag^H|A)`j7aMCBg*$APV` z*+&YV*wVXRHFO9XamkfGWx$}JGoTJk!Nq``yZ5u~Yi+x6?8MdKD{`@$^*}4C5 z=;ha*$6<%8S%95oSNGN113!tVLkn=Kl9iYMZU4zd+$VQF4VP5{Hzbl)vu#?kHvf92rVpHzo`@a12|NdGDe&7`9ULH7+k^) zCI1gY3Gi5yvVvmn$Z`S3Jw6l|B z&ySG`5Z(o{;|hF{*S3CKgxTpYc~n~-5>N=RCS|0H0%G9ZQsB*1%L($1*Bk$TguHxu zb;4f(RQ6a~n@Q}7W1?lxsp5O0#RU0m&_kKwSoc{9$I+ovZtszev|LxeBw+ur3) z8F;Nsok<{Hr}`$m&((`Hlk*YC*ng9>1ZWYteq1s6E^1*j{Jy@f_wu?vcsv(2hEE(*;TV?K1?BU$F9+u8ffkJYbYmy^fm}EW~*Px^bS0ksY@%D$IgJ$pfY_pf zg|1OqcvltPDF3>MU4GBC-}QNH15^IzBGyBt+?&Uk^T&9rS2shSUtW0ttFMYwkmPDM z-3+?ABtN1=8dAph0?S!4@Rxw(5_xQ?U$&~mckyZ6?6z>oSB)vIw@1w5F$>yccs<_F z8OZ2ts9m4O)xJne%6PA++8Ow5Q>pm5dz(T2{ztf9ckSJUVNHVV2)&?bNC*uO5p-m&NB zRhWEtS~5mHpWM-zSQ#EU*uHsx`@z(M2aR()0){-?nP>_R4}5K4lZ$BdoEOzgouSW| zx^Ce}CZ6Eg&GUAZiQh5u0oPAP&=x^)LAMCn>aVSPZeaWlJdiH+_vsRUuI6AE=HBbh zv_oV}kFpODDd2LVbK55Sy-cmjcP8SdoZk|vaM?*_w?Zc4iCRF}yAErPWaHa--pTVW Mm5ujo*L2tY7u3VOHUIzs From a171c73739379b3e45d0a60fb14210cbabcab872 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Wed, 10 Dec 2025 21:47:02 -0500 Subject: [PATCH 49/59] Added audio test file for integration tests --- .../src/integration/tests/flock_test_audio.mp3 | Bin 0 -> 90720 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/integration/src/integration/tests/flock_test_audio.mp3 diff --git a/test/integration/src/integration/tests/flock_test_audio.mp3 b/test/integration/src/integration/tests/flock_test_audio.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..74e8cac2702a8e568e5f73e8cce2bade1f4cda2e GIT binary patch literal 90720 zcmcfJXH-+$*C_m*N(cc$2#_FRfDi*xb|4@iYC@9|G*kgm6G{X`1w_Sqk`Q`8x&cu` z6%dprg6$+UDFKmU!Eyir0n1TDkL?`q@fq*^k7qpZ{d7Ov^<|H}_s8E_d(O4y9Q)_T zxgH=~Y(JqAECb{t1FC|8!U5Kk*hj2ky>3h|MnUQ)^k91kgLb3tW9}nFw4pE0ZSQ*& zg}@{5P$UCI7Yu^e^KAQ6#FiO}z3Y@z&-FcT{~e+&o%)zHTxVNEeh2_Yq^zI|v2XvV zN!3fQLgp?20Dli0sx7;f(!Z_0zRMzm?)~$(KRyR$E&$rxbp{hB0TKL2A4cQV`qJ|w zYP#odeYkPpLXDv*Nka)|kiIFmyXn@X!@VOf;5cn|hdAFJf3|5g%_0PN%3-J-wre0A zT9J^Q7L&XJV|=9UGnBjE!2ZM=yjN>5yOGPv$`>g4tZRT-9g zl+n}NuRnt5ydU(VdbN+gbu(*gGnR-VXp%k#smh@y>Z)Xa`LpH^NOs*?x8h)@DGKZZ zZ>Q?@>)VslYo%i5ReSY$yzMRBxfb>yHa9d?x~sJ?hXtd>F!!tFE?$*H+3oI{BSAvF zLZ<%Q|9(F6L+9yz5C)EPYg(hkH`=2t^5gOMy{kbkWNq1PbTq#9jj65yfTSIwU4hw} zZMj#xKj)4=gKD>Y#lE^Ts~^f3tq=URAw^cy#l+d0Cq-;)<}H%2`!WeW%l@~shj6vs zg^-}~b3gy}bvan(K%FTW1#0oMYLSu)7Hec^;9DMCh2aRaqlH^j{zNFHJt_qzp0&ES z>tXIEyYKw_x^>)eT{4esXJ3Sx*x=)=im0}RK-!c{0~7vU-&Lwz)+O>b0_+*1`evI? zmxsLm?Tydo*LQeLq&OSVo*c)Vkh_y67j!?|I1B)IrZOZhqPNEY|NInHGXpsZh3=_) zKO9Q_2r~Wj+LK9vQoEsu6meR9&-igOH^&5kVjjJqZo)DYOlmyZv1R1qA0q!Let?u9@Q$yD7# z**bxSqcyG6T^4rI)%E8F)S}DiY__3NfyOR|VLm{Up&&%fJ)w^Xv|e$rHAV691_DQB z$7DeLg*!^xr27`4<0hE3@BwkxY)gRiM<-Ewkbt2*+i!!W`l!mgn*?rvLdRe;t9ckJ zcOxgeo-A9x`z2X#AOgd`f5GaNf(U~vi?o}1vts!bSt7glL;}hqISR|wGtas94Q z&Rj1e4Ut-)>{&}~?K8nLU5z>d_PVuz(G7h{`NHH@^ z1D#0>a9p;pP(H zzkEGoF%CBDzY@}E9I;hy5U2RRF^-@0j!8!y2#L(Z^2YOa|(DpGe3+w1g8&#A{cO^d5J)`D3aR{o0GAH{!U{zqd z|3Rq&R|I!~wSJx(qb92FF6Kf{+^ zb(KYd@Acub%dTIV8%rxg`Eu$5KY5Bl%^Bd|aEE(&v|uW*J34qjCgpD1*m892QQe%j zhi})MjH!8E1ADzZ>3n%4M* zh)kLY{t-or04+d2C#ec(gRO*)+Je6$XS@7}0r0QM>E4p9{0t#g=fZkCccU=@~i*##2i57#sW&s4FbnXv8Vm@fYkE+2P+FfCmm<)^jAfk&I2v;h=~jP2F6 z?t$`wD))>!EHj*L|2#;kggbPw{Lmm21J>Bq86u`o7A254M4qLLtf|GBk#T3Hhccc; z)$MdtL%zB`;1X8U9XD?gT0d9+eA9iy{HM8nGfyO0_mu8l+a&%mJwI?BpSkM#E$d<0 zNrRHpFF&q4qzqSo=(jce$2s=M^I}(m$fN2}+KUmB;h@T?DYuG4&nOda4(|@0RK9t9 zNm3o%c}mfFH|^4Ind8CYt~cEl-i|`u!jao2uI-4~?Zwo;r8{x-EgA$kShx)d#1Kq{ z!PRVgxVn813BeDN|0rhtri79Iz>Cp*K#svEbVeMfs2f>YkW&CYl_bbNgjP>QIk~xb zF2MO%ZrIM$6P2l9TQvL3yHK1TIHbw^#R&g3eTLkBI6PDzO7@v6UelLO(t+!#>B38T z0)rP&J->WU$5lBn@P85YSg|zPqFH+e+Xyan8l1W_G>F1oOB)V=@bkDf0Z5}{Lax4;ilR7;7Z66Q+7u|BDcKK90JIt#fK z#A1e$%YpvhN^2M@IoC~yIG-zuuCGNam815vdS2mvmDOpo|@bV$IpP z1QFbUh){;`6J~Pw&k$TK7)mZ>Q`2#!e5|{aw%!jf*@%O~icoPqGJc}5jvihMhD?P> zNjJCFWIo!l4}m6XxH9^sXT0^DA93&M?5JISxUSduhw1EJrU^^NzH2f?hGTx3#RHcq z?QRL)$ol4k8)%zDPKa_w1I7ziKN^jdf67z4X5zmx*|76M&X=;`&3|lK7!opPcDMFo zU3f)NWo`e|uRDWCzqnU(LuLE|=b!^>6i&fPl~S0JzO^2hR8f$-BZ6-xCwQ?^z0&VV z8R9hnNtTFbfpCSw{gXm|57UjovGh}t20JZ>&xkvE#ak4b&|KM~kr?cKKV-H`F0bcM zP&{3@VN`5!jb=@5q?w(Yc~>Hrt&&=4l{hBDwYs=i1$1wq8+Op~SX_KbA+h6lK5qTL zbLp3F_Mr#|cj)IQdP7hJ+{}rb00gH)ask3)37?9&i97;i6AamQ?+vM$ zr=}o4fSV2~>Rl>+dl<5B8v(+!BDER-zF$uDz-0_R*H4RpEY{64WXqRK;QGNGOiJhdG@MWo@~%UY#*I#Qv(I?E{uWIXj7a! z7)m>($3j6dT4(^LEP%>gWLU_wi+-R1Ukg^y_xvHn%gGrnFFy#AHD;DYcg?5tEU2h4 zz6#tk&-|9q>3%Y)%St&n{UVgJkigS3cRv1k|yB5U@QGDM@~@1YCDu{KX3YO9<;Jt$^Q8~=BCS> z+#+5mR~Enak$>x~=6di2VsRFFz&m-RC4}-bV&E2!2N){eSsZ zaNra+@UFx@YMP+TIhO4CXxyHUK~?+>PShO;w2HTi#L#BX5!kxF;oxu`oHoY~;wn;w zY=?`_=zg3Z`GYsYI^KDlRzgi9oM%cg@jTw0vE zH$HOur1JQ@WdrE7`BRO@POAWx-xe2!eUN{{Ie)Ety|YZ?(*dPL;SO89+%nxz-nElU zl~<1K`O|hTcAq?VHs%PgIV3^HsfES#cD1&kfnfdr%>ejnF$sICH-U>qy9w#IgJnL0 z9snl_P}^JSD*9*l8eOuU7J2H$rdn|i*vsP#SU)4SnXg4GR z0T!$B5kY$Lu@wLXfCxjQo-o=RtZ_>~Ii)p(Vdik1XXsgSRi_^(nq_#@)dfMZA-q<= zrKF1_%v`3%5foEf%j*61Q2*xIjqQ5mq2t5tvuN?K?t0U~=)M9p6}GiXc`1*>)`SZ8 zC@r0m{_?#M*5eRdywDVcxE^6?Jy+x1YV+I2ZGo2lNql=GX z`Xnk9iqcc4@h5|kRN@(bxT*1(ch$?Iyy$*9h1|8bkNk{RJ)&_eopE8m#`j{G!iRbO zR#jb!8Xs}{YJA)t)jxKE&U+K}|I+9_x@e@He0Y!g_lnCqUMI#+6YI0rdP>&(1vH6n z25HAVf1XOd#YlYPrh~KmyCVhG{#F}5@9@m(4?uQ;PM5goSi?0rjrULEitBl1 zLsmyiL`o*Tv)dMrf=|{g>;I5J2d9`>A41mR0xO%T_Rt!0ap;Q*y z5R>R@WfI#B&`?L#VR&e8dUe>9EvVx&at+em0qIwPJZjZINm3ig0h;)PFl=6-n+v3+ z9PH=5)KF!)P_61XSQ70Ce01t#Gt*oA!HP}pA-#maCi4FR*@@kJ|AkP}T;|Y4;m{m| z@BqH8iezF$Oe{y3f$2+tzsk4dMYv`7VT56C{wEdf4rQCG!$sF;{60 z|MLA9f7<~-|7jIkPr=rezPE9Oax?HGM2ykc@@eT}Ot+VRLo>Gm74ZjUDM3)K-MQBi zAHdRXNi_-WuRhdrGiTEqn^{S$Y07rL!r!THkks4Wa}M?Mjov-_=DtTLSbnQrU7C!R}ey)uQwIR zu*G?dcx{)7%Iz+zo7M(jZP{!{?}^l#KsjZS^_0KF2{9}a5MU=Ix**X2 zRQ_Jz83|zs1oT+{S%`8N*|1+La>IIV1WD_sD!I$1K~z%JR0_^HJ`Q=w z$ZAC1P&a?I;I$KR=$Q`j+`_lt|Jqe>Yx~vtH(m6h=e(f5+t*)FhBpo#%xXWK=;fD3 ze|luG`hc=FQ7`4`UXK%{OF-RY7gS>=uYNfX07wo;b;{w(^}E|@d9^B*`K4|bi`T2( zcUIgfFjqCTy}PuvA$v=K5BKrmsHU8!H6O>ezWp8)JNj~K!bNehxKkGTHb<$jE?MQk zg|r=WBj4(s#i}(9V>di+WvC9lyyO+OomT9vNnor4tqm$6Bs=i6p&uH12oLXu=O_m>mrLL-o z&}12@6vr`q^^j))kA{dSQ?i;ELIIR1xU)#_B6!z>;u(WvA(cbrH0gpk30V~)_-N7f zk)q6b)zHhY8fX!9gH>QHNI=sVG>}eE0@CfcR0~cmwj?KE*cNrus`|K%W0v*A68Ic# zQ`^UY2sj2JsfK{xPtsIF`9ccL-BbesBo3ap1dhPD;#e)kCYa)xG8*|&9SQ@LYyRoN z-wUqz{g>}Qr*GT8fm~b!e6AzCBUpaCq*oT5Do6aU&13>Vu7x9>09PaUBT#Z9v{24z zx((qV)>h%b2%zUxjX#F^=IQsRJ=0IEvx ztSUJ+mSn94ADihr*zi`uXv}kabXYaP%=+`u8^vPRW39Z&i+7aIJR_i&{Il|=B!Q{N z;=g&dbM6{8Tu5B6ySFN2^W|NJf$I^y0g2daop{`?VB9_bY~vKu^~$q}C52k}ZLi)c z_HRE$86q6lFCIg3M+@KWI{W(j@wB0`j91W&8qW0AJzmYHdq*K_QZ_AB+#LJhsb4xM zo3+b#>x^TN@JIlFgM2ntxq=lcS~|BSaxdS>4TBn;+m}JIAr{ej{Tb?Tv^5|;z90%0 zI9C|(`%ImhoJsB#dy1zcDtV?BYDQ`p{bFrUX&sT4OESBI48p}!88fhPCFipwz=Lc6 z>maEdeWP*dj^1>}wetchCRD~);LS$3P)wcZ6yD1@sEbo71RCv)w88R*`rf&PXTI7y z2tQ!Es(HlTidmooqt6Hp{8W|Q;;}e}6U%P(KEA(-FUsiWEi5EPgwLf$0{iA`@Y7K{ zcv1KR8?suge)&$F@pss>v-B~*w&Rx99nI@Qp{8!6vN0JDe@z`_^9PN|66&saHy^`M z-zSA;SQBEBeBxT^ajcj=$YP%Z_12+-&KkbHIuVJhctgTOu{vM6EKNr&kPJbvvm{TF z{-hwiL>ixgOou@#`b3g_eMkz%Xq`>ZORYWDPA~&2xU}Ni?sMM$I3|AWrOBEq7}$YP_oB0dFkQfV6RI}WP<{r8iRXM23?@#hDB`)dDGd&5&T6*e4doB233oBQJEbNY^=kRz|^ zeE;!(IPKoUv=lSk3D9b)8B*PeA5R2Lx2Y-NQI_-lPDn1N8j>;hX7l9YWGXBKD8r0C znM1w1vgzTovWm&J&y7|;Z|0#WYoB|?DK@Q-(?UpN01FkkgEk0GI3(e8@k_Lb;~;$!|>P zQ`~w2_bpuyXrfD;gA&JW?&w)W!DREpS`f^+BU=87yC$Q1!_ic@>49g5&9CkYR?7c^ zGYGE(eAY7$%al*OtQ{lVa1SFO`NEKx6>m!7-~QIdEA}jurf90ruLo}+avGl; zv1*T-w+^Po6}Y|bDSA+pax}WJE{k<$riT157$az8+m61z6Z!JkzN!Y-Y2j<`n$+FF zXK=hb^q%sPXKSfysQA(ic@975U$kF(-Uujlul6>t8H)VM#MR+`4q@2o$kn!|gI8}X4_*4oS<#-e+!*@C1 z)_5@)0Y(3A7vR4~A#A4OU2Bx$PWPUJv|;(D3;6w9Vs4>vhFr9zx^Hf|5`pNuumi88my#A6!GEKwY;F3VIRBl9{v3v0$a7b zUbSG-EFSvt+MXM-jlP>5tvvWLJhl4%%8h%v1!zmZ)%M)&7c*y)ysNL6#6JsgEZMQ? z10rw-{qNl53%YxUBjx_)&l5T{w0?9y+F-ZmV24pncw*?hK2`!8m9(#s)LbYH=}%E|HtiBr`1j`b3<8AV<_T~Mcu&=+ zvgG};9i2P#iaemVaXVVB&HvpkH#Z(H!;tH1NHh6%2k<&vdk7PwdpLa><=6oaR8p7t z2w*IYm2m@7ZGV5NHBHwEAGg(ZDy`q?y6G7cBh!LRf(sh>ijO zsoS9LI_)6$6J2U4G4oYK{n|0IJ-?~CPgFiF)exq1BT#7skYNuB0zj6kb(<$h<*GoDE+%fD zwkt)eTy7k9YfkRNDD_2C`-+G)O>KRW5;v7B9#Od9t+c?p#CuiV7n_QNWw|+j{;gqK z_L)P@q$(0BT^@Z!N>t_+p@=uXo(D=rNIb<#l1*l2V?;wa3RIGe zFODcnXSt`#5uTcu?T*KT9y3_Kd{>W#I@ke!9X;hO>f7nfQVfZJVR8-NET085GO(yz z6xF@ZheKvaKbN9We#g8WpM;x%8Y|5NI?hVW(>H94yb+s5i2f#WrUF+w+Dxs z9xYXCn|(gN>11=vU+>x-2YYW8tz6T2)6#fhXzAVaYdf+xZ-02_!u`JcQD)%@_ESN3 z(yR6IPmrEIx{!PKi``Ij$3|XmHz`RgZOpMS@P@1j_h;T@jam`WLbY{ zlPQa7LE}rI1iZk24{n#3T!bi#ZZM-QWqs0feZ>Q9USX%XiQSuAR-^yl;-72|zLWURG$793anW z;JmNx+wzP$)^Dv`he^<(F~G;tR%@&RO)^bkK%E`1s@fuykA!rP>tm1utJ%4gsL*p# z0SRzSn3dqoaMy!TkF3#!3|nF|G+Ow0pI`Qu$LDOwxou!Bs=nI55&A5Egm#NUXmh$V zEf(wCJ{^)UYIXIsVxo-T<#;exg0q zp4YY?xPPPWv~gD8-7{AkX0CMZckIkIj{W`g^UU@Q%e~)JT8oW~jf&&acz@gz>K)^D z9Ju9dJ=&dEv~BN=lEs*-$$@Ex*Itt{t#uv-Al|xONSq7`96TQ-?6t6k-#pYduTK8wd?VohFc64GpgE+R z;{w{lE6wk6~9(qbH3t89(n*I8AO$BjlsAP=F5$lVPpYdAYcWZ6ua{Q-;9NY_i- zTobFPv^Ie$aCr0MHG!!_sLneA54kT^396JT1A^JP;X9&BlILI2f@=FFKi=HzP0F*z zItK0Gau&($`UGbIht9yNmIyg*|kgyC$!~*JTl_6JKqNG$LFP+`5 zu7ZR?fBDWH@OPkB{6#MWoxB*kw9~Q5=j0=+lbaxCq&5MpK;OEMn2i?gypVw9J+m*# zzRSrzHt#~4EE2CwQ0}M z&yLqUK2yT!@sFWM@lZnhNaW|r28WI2m-i19yNY!e85?)zo6y(0YDSC{=|R6a4yc&& ztoFePcs)b|&??!tfgYo@tz_|tWdI9A@(+P1emV%3odXXd#I(y7p*H7TK(GYR0xA4G zptFESbHAqz{>gMh1N|Hoz@KvfK!sh+d<6MFQxnaz~XvUQ%#ZZc}iUj4t@v)-q4r)jY!-{EwXCp3SN1Q7883BiP(dx#5mVrR7A;EBO zx==sqo`J`0R}LJ08JzewE%1+RpcJB&fssP8`mCuCP>{)yBMs!WzT6^FQ8=<)Nah{|ZlkQTkfZzJC!9&8hnmvIVp~ z)w{N0(9*v`Dge&}xIz>Lan^ncdx|2%XCVFbvvnq~pRjTx->uql3`3GPW|o#6B1 z0cA)ZM48|M>bMJa8zPa^?pyltgz75zD|m#1q7w>k7+5?+ezo|PZ~t8c%27Y;@^u(!`9 zKHD*@X<=83pIx$x+xGjvuVtjL>8{>hiy$^3Zd%UIT|7!fhj%9{A>jvdXZaiT6xZlv z7big#v+Hp>9YrVPCXy)G;tJp#j0WUooj&_Eo^1y$Eb*b3NfyHdBKbK0h?7mhFI(h1 z9?z?c|B-#$&+l^`H|lD8flG%x)s(5hsP;1BN*vrZV3={1HQCx$(?a8mC>5@zSDpLWA&-06~R3GO-(No>UY&of zS`xKEZT~VdJt?f7QpMOWpuFe_7nC!)?X{F_HORt-VRqSIL8_8iLhoU)xK)b#XaOaN z#7RlrLeMJ?7uM*f zqV2@6Z8{1qjYW4clf*`jCO3E*U|qj6s~z(r8)F6)^>2?`<$r1KH@g>T)Kb>cmXJX+ zfhFv&hF2U;W#~!+kP%H5ZF>g43D&)0?%NXZpQZhOh|K^0M{sL^U@66epf{o`{K?#x zR{CVf5PGg||A{YCMT65Of76sxs`!$LQX3#3`rup$>?GNu{vaGa?te$weBITLT?2m0 zA$LM|p80+_PtoqyK6ftTPm6jgNc36?eiJ7xNp$?__zC&Uk1iFgnz65zbd@ z2@mBEekQYay`COWJ}-ut8x%SzJCZy(S;Q6I`{-;!D>Toe?p)H-4G5%03IHg%SwSDh zs8xXeUvFo|t^4KM9v0ycT|3g;Q#vcHmI6F%oyf@_`=&E;>3{&T%|l(ER+XG=8nNP; zM>@AmjP54U@;pZg5Xgz5JijuEyPu!THH#sY*Nt_TW=_M}q!n~cGfco?ZM5K)-5^5s zj>$^d{gs+Cd6F)pkbDoMfA3eX$s_R>EXlrS&xx+rLU`UQ5$Ck*c*VqHcK4SdAut1R z3tZook}o;v^pCrLW0AwbR%M^IC27NG>Ea?)yY2XA{^K{QWU+a~!AvH5yft9va+1#X z%NWMUN8$Bg+`~@Ht-Rug(3GLw^)~+^f}dyJS5ho>f$daUSnmTm4r2%lwTL-Q8RGd} zUk759J``Vfa9OaVF-oGeo0FU`Lv%yt!{|K_C>$>bf1gLC!h>zJHm(QPYL`Ra;0Zyw z6zsf4Zl*gih4{4jqDdi95)EAb$ru~<^xo&0p&n?(A+QtbMA-O4FM4(!*!f*4I;EPX zLsz%GY#(9P;bWayz-T2M4lZbZZ-*Q=Q~<>`kL}#iO=0P9IA$Q#F094@Wf7wtYl{>; zsZF%haXv|-}KMqg)yFYr7_(LgA?{=*sg~O-D+?SyGP5sj#x;yu4^$_8gSi9kM zdZ`tp>JM4B@;2I=dUfUb_T3x|$@+w$_azjW%6>oZi*;t4Uz01x>eg2yZ~+~C=MzY> zu`OK`?VQ?)T&+HMtp^3mB$U7U--cQrVPY5?WS*zJR%ZO~+dh93KqNmzYTToN_m ziyq)k9|34i>CTO)COA!muJ4u?{kKKB(U+mnSnWy%XLp5L#SRI~zl!lZK%GGAD3(DoV31h>8*$A8C>n?> zP(wS}xGQ_t9BhD>uZ zzmA{QgcD4Z&nA?di~NgEh(B2fzhOeC=V7TYsd#uIk}nsKz#CB*)7q;UnyEZ=Cu`R0 z*(XYAq4x0-Fz}4f2>;TC$&;C3)!LmPZj&}tcr=&fjh3lsAz1~gE}BAh$3|oRpVMo< z=9E7IuX_^)*TT6+#J(Blw$Tg&s4EN7ZOX`WHx4gG*DJ!x{CMpebp)E4_lH55XY(4u z1sLBG>S-MF2fPX5fx&*wkGJhV!0;Cv)2r9PM7#M~&oxp79+B>Y}Juxh9XYG*bE!p)C|K1bVvzjed zaq^#Z|ATd`@4eg(q0r$cfty$wvc&#EghC5iCT}oQn8<+vI8tHP6DNx@W^H1+$etlt zL?+$^fmG&n5!9F*BzP7gtJRW_p>qBgIDyHx7ewh1C=d++7*3!i57rX&fkimrG!0Lm zZwA|dJOC$A27>uAly)|hhnYUg(T3JQNV?ka4=5#xAz$}42?9g`s<3pt;QWC>~S_aGwD$D1%!K-9sSs+oZ(G)=jq6Sxv z24~&wCSNtulpMIlb^{juMmnm+!CPZ>y!n$Id$k-vERbQSwd{nBr7y;EkDKJDwA z8E;sv$DY4BYFr$?a&vE;4X#t|`K6*KgOb_-=9K!dg#L>H*=_I7G0q+Q~Za zk)e5Z&%9B1V)#n!=|28ctcnizkkR9}SCtT7({0_qt0pWFhh8i1Ux_-s+FQ+NRo_t6 z__lW{Zl6}hy<<=IgF;eQyz0G z_;xX(%X-$;PqgR&mz#R8~I8-zhx;iPqHP~01Jh+IHGi5HPU(5kJ-g?ZzH zj?-R|pEvH=td_8q^_4w1@0ZXYc6Hqb!O9uqFB1vI1s~G0Ha_3Sd~$C4)`N~MJQkC* zy9LUpyRT(oWsGh+i=3w_{k>Q%g|^I5PF;&}^`b`Wh*bb8Ly3nmirPqU`1&!HFRO+8iLS8oFzR$W+4)4Am>Li1}^ zPFpSt@0}%SC0W`ma!3}-GhmYx%eRZhF%7!FqCHt+ks18_QtHokt-nRCuYUPHh<0?a z)BfwII3jUWa~EeYSc&oH(-|*~B%L{HyU$zD`3R=UO^GMeyczMGsy% zA2bN}y%(0>y;OUr;o|Ye+m?Ert7WGyWWIZOJzgP9I8*+WrxEP3XHWM6cl-IDDyL`u z(s-Ms)$^K^(cxYB#Hi+yk8Arsn;P!yyOP#8deC&~al*4SXRngC3DWKBD{pD0*0|E=12PpmDmKy|O#+AP|2|NXJv4RlGEg|wj>Gt>LyAG1$v($#aZ`i*6Z}Ink=fq5YE!1A1kJR9Is2mr0(tXNHkJ}4IFdPEXvw{*e z&g7XIKI48t)~q3&26nPL$eWb8!B7%ALyke-C1+V!La&!Ipv1z$JRi2BA(dHkUoznV^`>Jx zkMkkkDYon=dBjIF9Lq3-s$*Gp4xoh4MUu%LrUUPD`pq(@E%?_zv2bilR8f$+H|sxCLDQ@Ry|qqpzh16wY7~i zHEZ_`Uq79`HPzQvo_v{i)Ox|91t~0R+%kZVjdiKGTHb*X{3&;K0vLIGP0ck@O*8%a2-^ z9+(SW#8|JJ-C;NUtTNyu2fU;-4sALMwh=VZh6tLqlBPQ65!kBJZR~rL)~W1_oDKu` zQZ#KZ*IAvR6yS0n(RAUOm}CoD0L}>0R1EHd7XVEUXvz3#FgsngDjPLq9djjgDfCnk z*s<>;B^^oJ&8){vDxDGPuJ*#>KnL5%=kNuaLv{T+^sul+!uY-v{K2udH#!$HzzI@8=1F0R-_&s zHo8T;klqv9xA$>lhTkh^S1(K_Xl_H_Y*8Hae3N~iFT?V+J@Y-zUwB5QM5!#GwUN4rg%DI&g+}U2 z4S8q)J$><%3MXvvI=@0{n-Ufhk<6+Mr-st{`E>=1guIMkJ8x)+#lbRHs8>;`}%B1%~2A( ze!HkU)IU4^(_V1sgHL5om~TznmpZSC`UVr*8GFZQT%(G;b1qa*E6>8%R2SYESQXIN za?W>c`hywf=sf5zJULRzFn)=&7Yl^gkr12f0K9G5Ezd(BId!$%4ePXCSa(UUb{ zFowPKUd!#n{s^)Bu@;NvwmRwWr-pd}uXFFAJkbMKemS|KE zhG{-YFkp_JQn1xb&7n0YtC;5bS}kIf%mlJK_vWw_l9bbyz9sFeE#55)^gQJM91et8yVmT^cm(Mv4!yO%;pSqXYLLrR*U2^t%E08V z!P(Ogs8c%7p+dXR-TMYWShUbD|MBzdF>K%Ma+Ltlx6!4Jkj77lH>R6YKD`XN{#{`* zbD$Ofg7*5K1LEKIUi!1~$E&HQ4_kvJVGrAn@k&p1$36&-Ip1koRlWT%^mw7Rg${w3 zp34Q8>CA-`;}Sf9+Y{WN*oJW-(A5s=JSlrx*;No^A}+PD8D~F-zf61RjYOcu;y5-f zk1!$cHWV>r;p_CpNCS#Gz(6t#P{NvZ>aEi>g^ZJ|EK&RuZ%vrPkZ8fNdWPs+#A8j31!+JJlE0oG% zxpB9hf^BcngQLPK1ZSZ&k&co!AbgV8@VMrQt;EiZTOe=N?1jnwn#(iYg7Cz-yPW2d zn(4zmh&f;xmA{6Z9EHgU`?o7XWb5peGIQRKhlAWk;XPxXoF6yZDcFHYNH#6=FE6c% zNNJYQ_#0rM9a$tYv(zp=2Pl9$su-c(TZK*v5o8(+Lx8?7o_VVh z_jlEa`OP9(KIJ$qm(Y!-WWo$1INf=shDmQEapXz{WtgaOdHFx7*#9g=)Yk(LZM?^9 z&_XaJep}QuMoS6*NA{j8Hrui{6ofl|wW#UZyQ%1BWBsn{oBsZLQap4dc?kOM3mQG$p zjp_3Qu|KYXohw&BSYbJjG+B>0cT#TH^XwGb(qNmp1D>%?Xn!oE*x*hHnBt>in`w}G z^Mj8IbBGYRrg9BVk`7M|WBAg9VExQh^rfZDD#urJ^ zSSh3X9N)0HP6TB~jp*if!I7)9RPEeU$C)gE18td<)EiZ(Ozdv1tby}Q+&K$DIZe2!{yOl*3t#5>)%SZgV;@BU>8I1 zw8zyn*%ikaSj=sFW`hRw-^gT9KU}i{! zRthqXk#vN}%=^KKm*4sY)M<=-}RF{As=q2`_4!x3V3g3U&)qL7@Sm{Hfd(Dl!yO&sn+ z2lqN(T(P~R+C?MjZEnpk-!uL!ht18u{?9N$O^RcfdbG(8dLP(r;@Q7zd4A3Lzdo4; zjE_mLaPZ_JcSagf-#!0_`4K(~oaMV;c%u~#=m*Q^g{6>$HCN%rPJZs!y9~>@yke`n zP}Qx08U-Eyi?8zxYI6PB{gX-(0tN^WB4~hsfPhazP{0;KkrFiYB4`qdG!X<5>rR0X zASj)HsB~#6&4y)%DkUf&%?=`nii*8#b)Wb@bKW`cJ9Ey5Oui*E`Q={Ez1F(cwXhzL ziy!0H*RAYIOw~NUuCQS`2YYK==e9*=re*v2lH(y}N=}uuHKSnhP-JIO{uO8kn6We7 zp8KQqOXc+VZ)wiIvZ- zT~QxucU*7&JJ;9A?^&PVQSc>VUIUPWZE_otrmKVstGkGnA4()4+S5VgZF-6s zOymW*$}vaNNpJ&zPeOni01Qs@hCp81fH^8)myE3A;ZRU<8*LBUw)s?fOMpcPDO;nn ztJ@p}03Ub(HYRtVGrV{j3kVq{;M&7~xCwqr)a9?%P@*;saqelta5dBdz1sXc(W`MpDkf(CUjh&mO_{g;mO7iKoQ0LbjpV?9?vwLVMuQScm?qUI7kALwwS14 zcoHIqx`~ub3~$-_ zo1K$l^PlH$ijVD#>6sbe;A%MF?St1l?4EgtGs!UwcuN@B>?tVD9W8B9%0t#BQ#2VifK$>wyFr0reDH@@DYvoFi8<;>-Usk4{TLta6Eo>Q&{ zrPe-L!4|_fA6u99Jsv+5Ra>R7K@v_|(W?M)x{<28@h6m2N1CrJHl<=p+^2lML;#8!X2#a3(ox*mJsFbF z$TePf^nI*qs>g*iEr6oB9w;o16jX%wDx&Z$4eFOSVx2 zFFXXLi4?4_lajCpnB;7m)ooa$uY~DKmcy{(uwE2%nua$9%>?kH+a1h_2&4ra2Zd-jzSh;&r8}BmQa(t&=Xg?|P}ld5$zdiu=EmusdPf&0<-Z zNyjWB75dDAmH$9wxJMwD`;5`23lz_?D}WSRpsDUIL(XjX6AwS~IPI8f5d&4y@=Prv z#CPoc!*}^$uw8`ysc8VC!-oa@NZMGA^ZM0A+_`2Ont8JcD9TQ4x_@W5ne}jhKEhLv zSHIdBX;TKWF}h)CGjra$>mcd^C}T!eK1;uxih?jva?D)KkuWx2rtEPg>V5zI_ST+% z`T%VmRa_bhfRK$h8R$}(KH(LW4lzt!CZJ3OHCNa#N2>97>kJ`)KMVE=u7m@mBwG=x z!|6%PJF9@G#ea#JG}H~wRDI~Ce>QIAQj&KUW<>xr4kWGBKp$kkA2B+ii&KS#WUY5x zXpEeJ>4XmKpVhZTTO8V1|1?eXE!w5otrtlv_Je-kE0yV<#BH3Q53z3pn5pv8`A!3NJ84hkFkJSU?X9l)8Y&z48N9xc|84vE+kV}0um@U+V z_c}OlUPIvV!a$yEQ!m}oO$ilvO`?P}!$}pN4`k#ymWSa$J0wX7fk8!yBOn!Ldm=FU zFm!~LG6Afdmts*I#aCl^h(CO9gnq*KKpxLP!u8-M1UdC+gK`R+=&nhxMMBm_lDD&S zP!bwW8EjroBC#PEA{bZSR?s;!>ymST)Y+Nj0;0$r^97My+(s{#bLM|%)}0(qIDP3h z*Zj?$?v{->->9SGDnZ_;nC-sb3JUMvq^#I~cXwdyxW+bj_w1KG6TdhE{t4&figjPs z#~1on|NYnH12G3JqAyv7lt>?2EWJs-V0TfJoLF+3c*WvX>1RC)i#zjQU+=6bFZy-r z$DPkh@7!NRdk1DSpLX?q{&;A9mv>R1`1i0bF0x?cv!A2`5!RKm3Q#EcTM5QW^jNl; zk_aoD<{Vau!m+V{LeD?O2|%%D&>#Wntvhl8)P&4IM%2J0gU}T@aiAuYjoL*B&`ZdY z`T)GL&J+t3>Y;V^D4cwaaIc~LwwXxXPS9Dmg6llG7niSdO8trl#tf8J6YMErxZ%oB zxR}vj+>IGs#L6 zoUVxm5+XOlXadFn-Bvv-XYig=%SA+<7<+#+yPYGNim9C{-LzdL!O~9{VYE-e-EDAD z_4>;@)a@FZU-WKRZ7l0*8=nfOkB-o^bK9%-en@pSq4}oTCsTth;pop6MhP*u*RFCb z`G%*R>A&kfo)LpFXwW7kV z2p;=Gd3DGY&&mJq{R&-&0_GqrBnW|31*W~RQbI4s;|L54rNsWX)D^B!z_}96!tGDm zs1tMnwBTD)X_K+*Z{@JTj|OY7ED6XO?&cFD0k7KhR3@&}z;+qDvNZuF(-EE=-r+s_L0GED!j8B!gHr+7sLpNF2oymTze4)$LL_1w5LjCXb4wW! z1j4=Nl~xeM4#8OJ7OT4_GW(hssPR4e_>DymtGm=Hka)3AGjuR+eS8h@!#yi`9ye#O@f} z_y*HEf#!XgtFK_43~_znudCfTue@y3ickMc*Tb|NR?52_R%p0l=kBa~UV);y1pmkB zSKph4$5`bJFpKtox^=jwBRJAPtv{L|xncR+{8CE?6FOcW48C}F9D9VcIB1%+lsW)A(B4lxl$v5*OvAY#I zP($cP4QEB38A@lc1A=L52rN>*!YqT|10s(DJ3I0$< zsuC-A+YH0jI6-nYVWHAk8|flR3Mb^Fjj=o=V8TKdctuJ|$|I>^6NG%@nZ@AIe0tmz zgnpvjx*p5g&r(r+ege6!Zdv`SX>83e{yv%23aa|9ubv^KeiM^^hM+IcRqjQi>gG&b zAUb!1XuBFpY#hU=_1QL^3=*#|><{1f33x?)@@rDCJ>bRT;j$)t3p0m)B}15jQxj$H zM~_F}bk{js6;A`;YC?D!1E?jL1y_MM(^N%2sn_fKTMHKxN8o9 z=6w0Nn3mu&qetcDg}-C&3@_Xm%ks%=i2krP;ji2umi_KtJB`k7AV;yROhcNxdMlF( z(BSFCEZfT3Y!B!9%2Cswq1jQVEf*h18%rJ_Ot(uLMqP^ay>0G(syMhc=CbZzXXiyO zb;)r-|5(2`@|RiBnfv<)+Y)T2ZfsRrZa0Zka!zsND90-$eBo=RsO|X^$U52RHpRCN| z)nDJ=ME%eG`T5?TwvVV^sWxKTSit(^mzKr~cMFdB<~QjpLo61Lo2uTLUv?|Fs|49n z5QQypQQcuIBRf6c;hhujO1CRZ=2abZux@NWGq8dQ=H19xROM(|mP>+KFa; zC-vs+C_K}p{Na|mk2%q`t}$oc(#Dr6vqiPvJXP|SXW>qq%j+n1k#}-f%n!!J2!gxu z2`lD=c~=)`fIqAb_h63rZK%Fo7|t?OA49e%38*cN^pFk~uS_n{nIxY= z&4^r(A?K7rRPn6!exVrz>U?6#p$(ylDlQvrWC<@b2Xr&+8*EEQ4^=AZOuLq4K%B;8 z-7fhBcXLmoy0zK^g=!1jvW3OhOtym$Zpxh=CZVBY@&ejsvDLy668KmREDz78uJII8^53kGOr%=E6|YodK;%Z#Fdo~D04U6`iXJtsh854$ ziLaFM2*C&b_xaVe9d~*AVR2e8cxR`d<2lu}uRxu{g&77g!68P4Q7{jVMnU3h8l2|W zxm_mP#a@0BYiahw|7CFgEU$6CquB{jb1$!=PAPAl(|E_LHc|D@O@q)DDPzaY0`nH9 zpv35k5$YU&uHm%r?uAvIn#(!im6HCz*h0R8hbL#KO)Jn1-dU6*f&UYMa=DS2?PQ_% zO{L_zhhm-ViRsqLWV9uu>j)_(5;0#_=R z9PxQP?0pDc2|r8;cz=61=e&sb$ngJNl(MrHs?&r|PbTK&<*!Yf)01hec7`K(sSZd; zB4EGF#$zB|Gd2MvFqO=$RfhsNP$mnO#j9*}B<0kRngz1RI#oX348i>q@= zfx_uCB0Brkh$#evvW_QPnQrJY>r!suz7{yrCP0y~F717&+#W~TQ@X3BXg37~e^9`z zU}7;XWk9e4o`kDaetz!631mQOD4o=u_Y33kf zFK_sex;DoM&u#m3G>UG(3)04>h;X}F2oX59k|yghM`48y_SQ?6KYSJXSle-{kbc0# zs(tiPUu<4ajVbxOQ!137uDXe6H>t}E3%ACJkih}VbIQYO{a?N%uZ<@Wsr-v(jO-O| zyQ;rhKFf_5QiEV{7<1AA<~5UowVAudIQ1JasNw-m+}Bxl{c`LQ}}GnS*ct zVeWfk#)FkitL2EVcpuJLUzhIN<7Ag!h&z0FnF;dJ0K0D=VWi!ktUCFRGCouLpHOp- zQV8l|o05^P3Ew7ihPcPR0JQz@9|pD6<_hIV;Z1UKt+`6(D~(%jLMgE zg*-PCN|Z!vXIq0Z$6O&thtFxpJ=<6SZY_6oScJ>ysci3X3zZL`|r^t zJ6-gtMJT$d!VO>^U59YX&+`1`zZONvTTPQZRuw9{s&m4YPgv z+~8jar4Alpteweu181ijFNL4p(hPGd+5+~{G&k!}@OqT1Hh6@5EnFV0E%8!8@`b8B zR=BSB<*_}ps7~Idp5_}pypeADOt&oZHtrL4g;Z4`C((e){LM@s(owtcuzqCQ>Mn1k zlfssIA*2(g?Cgz%wHY{gGqnxZcCk&MY}qRI)pY^gU?fRN8CGo54mua57d9On5f&cZG6Vd|#j5L^T+bV0DGshuJv^Z=b#Zmb?bL+Pu-AktAAl8y6b zJcy&SsGV!BN9e*pSt0<7R^g;wnE^B#itmoE=guzR0o;kULJ#;5<|N{UW2i3Lm(KFG zx`y|TD42eA_XI)==}z->bRpkZ4m#38*y}_zc#{(FY$@qMEN5Qg$hEw)|D5Zs6R&iW zFS=DUD%w($(Y`m2quQMTdOXL{4%u8&7F$MHP=TxNftE3rf`+6D(DkP4qu*j# zTmb;Jmt!5lz0FrC8$9=2w=gn4lkQ5rZ>1a@Myf@j%*Z5dEtn1*lrdy~_;!Ym+kq8N zrU5%Af{D7Elef6v?Ahc+y@O|8*&VG|9C8(eXgePeTyLqpwAmeE7S4RQQ_`DI>=r|< zp@!f6^sCtEOU&rLa}v+7Zpsy-3)fq|JEiwtn9RH5o-}i{vCn}VdMA3`F9Q3hJgIC~ z+eIrKk6<0FdP)0mTSNJsytf_!kOLGY(`wu7QDEtGaXFu}(kbte zVE&b49b9&I)*E}4Piu)5S4y0*bI%EhgeL9Zesy@2* z?6`$p+^0(FyMYD}4-7$}CTug&7EVx8E9Y;dF`#gp$k6cVJ7&4qW^WtoqEjhmR-1PWIie4l=0~ zb_$+qKqN$c@M=yc-x~^-IVNJD&JY7qZgzjf7R?d@JtDuPaK}PFs$}FXx2R&E%vegv zQSEE%GqjURL=n~*r^iHD1VGdmok+ySvb_fQu6<-3LYTrdg_5FqJ{>9{tFB`Z%ggBr zreh;r`eQg-F%Q}q&(iJU8B{#)=9Ggoclti+!g&=fv#uH4PSDh2Zrc`2lfD1&{dwT1 zoi6Y%_)|G~26a7tx9iB$?x~%2*A9eSIYV)f^xTTDa$lW2SYNxBHD@k;f`1>^{ULDr z`OvE$TR&I$mX*GL8RkX4XQ1Zi)m`>7>0!~kzVZnbVUfY@`edczx6tI@kbJ(FCitR#skZ)l0vrN~;{`^m zZjXO^&Q@86NF~cs9zuy1^7Ly0_X5UcyX%`o??&R?9r-*rK64Km=6FyYb!Bp(C9X4W zy#7c<&#mY(*PZc-HBL_4X(fJN+an7|?7F&V4 zI^Dq?3R$n4gC;2C`BD>_N45$38zn>(Sl)lTr^--?-HvLk4i9r-5TM?g8+W-h!f8r6 z0TZ5Nx@?qGBINb(t-Wyw0q@cieF$KWgzHIj2!xHR)TNKEhUUDK@SUC1OV095 zm>nK?_YDO-YY}by#cWRtr8)tLZQ)F-RyI;wpLOXk)oEKDedpk1>fVO!zUtmEw!1C* zQp4x)z`^jA(rCNL@dfo4hi{r&=@Dd1`a?ZFUbb4O_4g`wnY&e+ ziY`okzPQQK;@gLAb9Nm{#-_)kZ}Gay0mb9fl6LxuGQ^p8;Q>c`yu;`k4C-a)7ZTnx zDU)fp@;U~Ll68Flr#=Vl)dB~_=fUBVzF<3Ghx6DZ?S_Iv5vg5d3MY5X)zY#WmVRsaI2}ci?KC zjDJcQ&l~w@$V9rMv}@+2MTc_*O`2(Bovz0-+^E9FR6i)!0UsA85B)j6?-R(jj}-cx zNa&36(Hoi>`>&yXv^}|;^VnN2a>GH-?w#T@m&1lWn?0KHDy%YJyWTgPYj3@lUicvJ z!>5y;(M>l8RzZKXUtc%(>hicAWaMF4pW3h1H6u+!bh-oGf^bf80md$bTs|KIapU(m~u45mRZQ)H&Awss8jsI-A~RP;_Cn_Z(m6b79@P|A@NLRjkP z`>Ar=Oa3Rw0FSN^Tjsz<`3yLPHwmHeus{Y61EG_l8h{iELBWs!2!+Y84H|$8;GlTX zn~{Vxj@EMIoPMY&e>Rm1arN+d%J1TV)G6UfBHqj(FVN^M!BkLNEiEt1-2Ekh+HdQd zs?MZCf7~j@um#YM!V^@Y0py}SaidYcbwP?HeXzkiM84 zmJV=Rb12*HWAfsn^;Y7L{Ueg=YxQt0jx<;R-2-uW6=M{`9K7t4Lp4p4=LxM=oy0xE_>6!j~+*ExWNF)*k-FI^-7Y z!|Br4dC+rPl;6=~Sw{DxpaK&4Z>-K%UxRNMTiqgWYI0Av))MEapE>Eig(aJ_pOgw# z<~@v%U+qNrTaHh~yzH29P}oe|c-6wcnsrFM%*>86I7VVDB5p zOY=uQr_al?*7+S=7t%UDa-t^aRc!YD;6;26S_|}w>$$mul+ngr%+MrT~X(fOp zA&m~Ms-EHYg8W$BILfQ%J-m33GzB$OQM$X*-~nOTk{XX&kyD|c*5#UA+q$hHsQ8`_g+4_)nn?Zj zj8d0j_2?|^u>tMN7AhePG~rn_d98w~{U#hVcN=EeVw*LvA?nOjEllNoiTDCv68?M9GtX=;h$pHEBh2Bo z5RqlibF@b8y*{(1ZjAjhZY#F<6INU5qOSc3ju6=AvX^|&MnKLVzW*Mi*;#PP7J+9R z9D^;^y)0M4V{w?Br@j(SJw7C^4qbJ1ehOqFAiyjOD^_xkmmNDMRfqK6XPImO#VbIZ zn$Hj!1-aribA`_)yFk={i)Tk=pTVNQ>-RJ6P7Td3zh$Y7iXXO>Vb%wUdzNDY|G(2S3c@~$7gN5A`2WVCtMem*$nIs!mG{+kz#5`d}SnLzNB>CdIjvibTqU zoPav5y8kRaSDHq<{E6kAvHp=_~HfIPoYAQ)(J$67DgX z48OU;F5az3ruxgQ`{tBsS%xz{`D5YhyrIJ-X9vF~X1c!`;`}73B5dc)SDXRvh!e*w?+)g3>2%h!EuVfyIMn+v=KA}guo1jf<_3~1Q&`7 zMudJ2omBg$0JAlSR@EbOiMA2=wZ!ep1$AwNMjsAVmydXp{7Jl$$kbF>4urQ2se&k~ zGlx?$bFh`FYf94O2f|7UGuUv~hO5YvhY4X~uJGEq+s^mb66j`J>gWmdLcwaCB%g?p z?>tz92Bt(869(w;fW2ihUe5!N&MedgK8XnO1q-*5XL*_i-esgIJM|G0HHeg9wj z1K&R5&*aBm2>$v7URoh`_^OwN4bDbLgTmHr{C$rIjWgdAVn6a~;Nc4o?)sd6Ql5A_ zfCz;p3TH8I)+~;)D2Y1PJ@(1?f{B+^gV)!lYPnczAh_m(Ol92)2h25EPtGcY6;hZ9 zVfZwTfT#2m^vhFw9M?0Zi=AdYWies5FF4;iFJ>Ef6sarJ9b-;}44)P6S28(6Go_k* ze;{witO=q4Dlq_^G101}Ahh>DC%+_2zmp`}(PEEVnjt0Kac~;5Gb1^LYJ%HgiEx%P zIH;1}+;WIUgM{K)q0|VksuxBTlrB^#hTcp`v+#znPk|^Xm$jb4Mz@Z&R{#K%kDdxg zFD?azMgH1s8HY`_tYzcO9;cHpJimfXCyXpZpD?L8 zt%71aw%A$ms=K-Oa3{BW<=b#+<8>Ox+J;k%kVus4f_G?{%~%+Bv>?sd(g}x>AWf~V zJ|cN3>R2#T8nY*94VFM4a^h_T_VEQzNhoP7Jrw7fiR8$<@rs&7Cng@04yMNQJF~j_ zh){wLP`h+OCq1T{W!@A1d>g_2wN}F9bzDlxsp-N0#OGwJ9a+Kmr`~rz%HGy1H3sD+ zv07*3V`H1l?c&#H7&s(HL9J@>|2Wi~iHu(V`>JZ_hnu*8UmY`bG~1PN!|3bQHn!Io z5dl80KjLe}7FzV{g*W@{+aLUukA`&{i|MS_b$bHJlTanRjCQ=&AICFoh=!Gl!us z5<7eE=})yX@%qYRY_r-l`zHM?8`0rqO3lUiZK|pj3hRsP{bXEf!2sz|^7$1;5A6Tb zMG8YgbRbA1BQ?3;^1>QOD_gQk6ylE!?Kh~^DA!d~sE{mJIp+?N4VZ(G8Vp_)f*fu` z!F)0Ah^@>5gu>iP-vjZ5>ri;FLGt8d)ZXuW{j`}NegzAj%>$In!}9HLIHE6+2+HH1 z0#ARw36Qk;*01+<5dVuuQV)6W@xfbdIOMHWyMD&o!*Ih`Rg#WvPA+CtfLR&kdhNm~ zoz8R%sW@kUPz7QwS}6lgHTZI#!?I^pZPd^-_TY@d{48zo>}w*69;*`Etm(*CR3Gc0Us%Xf(Ow--Z1*Njx=$vb&R<;Il|YL6D*SCl1h#yp zGtP?8i)G{P7mE8ZhJ-QzrwbC&EakR5`%q)BSE8jFHDPcqOBIdl;^|9V9B=^-{9)s@=x-A&hLQ&J2L$%?MCUY2m4B( z4zyAPWY1FPLfkvu_qy(2hf-$g2H+ z`QE(l9Q057eIu2DL4P++l&fj=`X}of-t4I}$DT5ayfJYpsu+IBBm%=tGogpxMz1*p zCkZpa1%(Kq;Cx5&rK!5hudm)hZBsp|MS-&N%uL$#5k5w=Z9?s9`S~!fk@(M!!Q z?=;x+iLe|Sv;!(>e?Hi5WjX;U=&;%>JtVz#gEGgIFj>%l&D_b$ zyLe)cWhe^L4OcmAiB>Ag>3+ry>mL!zp!BQX01GRj^eG5)sApbdgL<@qXH~dH=9X@d zO6m$tTwj!J=S5Y=4$(FC5M)ZQwLCd>g)#=~OeU^Wg-8_2?(!tH3s^UtLBK}_uGYLF z9ptF2P_xQaY&3{kaZ0E~&~0{Xx_imEDHL5nE?K!dGVX>Owf_&_f0CQ*bV>ixZbX=0 z`|zl|=$guf7a>uXZiifW8t$GPsN|x3pyj2M|uym%8L2@i*@vIk~T+0jwza=8hQZI5d|nppT@Dt@R|;FqU~)JEnn zzba_$b1>(&Oz9(}nn=Kay`IltpDxP_`&XF6?gyMXZD}W>>`=ei7x^dH_at1--_SE` z9IujnFAd6W0E*Z&poi@Z7#Gkh+82;#OTs`Yf zZv3!9D&F=e`p)aXD2w3`G*nj3V6l9Nf<%N86EL9Ulyxx@HI*6t-o9da4+vkO(ni)g zCoS;W(}oto7ZQD?;17;0P|8}^=}Kh!LHr4Oacq{23CL-Nq9jBtrDuS5LZphv1*)VL z^NKe+yIw0_yKa3z7jHYw>V`=lMad9!Q;B51FfrQ|5&!v~z2!%@KYTwXCn@Sz3cd$Q z6Zc>KX#kz>}T?@eB-u(r`S9Yy~58RB%mRCayuv4E0My34VE zow}PQ&xB{@d2>O7K7Wi^A=D)LmpX+zJ{G(vXs39%qq>>T_g*8F3iUqxo?xCD~^$P6T)pDE$ zUEpv1)E_tp2)A1bJ1Ux%=@5S{&VMCJzMjHned!*NXNx`Olq8iS9Em4_`(8HirllXw z1{OshC-4xaB2%bzgWx?s1xE zMmOLFH=BLN=Z#mpxoyZt(<0M8DOCbXTf6TDu+=_@c$VdSC0GW^7S8c5Q1?+q>%Ho5s|C zCE35eRKJ#e|tP9Z_a9Z8(eGhl!k?5}B zfrn+*D&2X`D+9kv1)6E+=w?38+N$>wplrI?!f6dwz}_)S^-WDxP>52N6Wr7oLnah( z1ehtH8io)$njZDt?knza$Mk32azR4|ayXq@8F;Orb~5p@xm`}=8_M_X?ces1@LgOL z%>U3|(b>BQJ5Relcxt5+RKag{R9$szfKxc#wbImnKw@WBYTw1XW3{e-u42f8uzmwq zJYCZb%fxO{($YE!1P6t!p-%kfC)hL;)^Oh@gx%jJZF}~yrzSQd!y)F&nDkOQ$;XmK z@DdWmKzS8Zi|GudNS8*stk%Nh4G~r&?cElM6~9ktoLIk$zNqCL*?TvUuwHq2XyIPY zN)_ONRhjNkq=WuR<~IvUJWcl$Na7zB@Q^4iG(6VD0U3cocg%px-Bjwm?83 zBh`QadKmP3wqxziO+=CwTTQ=A9RA*kBn!41Ahh*-W&Gj$XMPY%`kKNZ?yQ?iIHq?f ziMG@~?=!lmfqK*b=qK9td+t_F9oKOewLeWc zj{jll?1{CHJ%3KW@^XdgAuYgsrZpqzl8ByBC#uhDEVNtQZN_ajGdbyLPg?67=DOI} zKr&+HFtuP!M3m;W4G7-edfa;Wx(~97m1~)4MUJpw$>YDMw|{{FD*^YKR~c5S-j@1& zW5|7>UG)7bo~Lc*u5FwXhbixQBP^X$H=OE)Gqx?+$Bigyc#RCc91_UuLUV*>sF@L? zK8Oq*0i&xgz(!EzbPS|($SmUu`FP~8(c$Zf9$~3KynhzQTk_cdu$4=7kHTtIm`a+)sx7yV&ZcfxOlP99O7)}KLyT^X)jmsz(DU{C!}s3)C(UOSaoY9(Y-#MI^>}t%(?xLfAoFYu0#gKlFUWI!Z%! zr*x-6|U&0UuaqK1mAbdA>QEX4haY05$MeowINUe1<8E6lFKDozy{8qn;6aJ^$ z%jcIY%uKEpGQmM_EAI?b>&4Yp#4#W0O{c2$-bZK=K$}JGb4sdBo6+bO$_m6s6-5>t zf~rE~017P=9|b4CflLwKVTMhIW8tvs2djAe%uO^c5ND#ho`IM!LBSyWp>`~sTT{L5 z+)FpNa*}L>fYE{*@%NUkXPF_)AlOw<9E3?wMd1ka%k1!qb28_M)2AM=|5|U~d$!i$ zq8{e3(r&JH-P6Js)g!-bSv{cM;{QG04vam>T-m4;|;p^+ZbPNV+pp&0@P-fkyM90J%U+T7=^WA6|bh6R=Nbz-@ zO2t65$Y;VyL#dq;@_XNoXluABvHNzFx#DOrdYX z)y?+~6PoABVd=bDX`-=9>jKstju_!XVOSwgIzra~)FW^ey#(`-9+VZk8xEnbKoS5$ z{GL9Y1c?}6P1Himt$CvW)W-&DrG`sHBD4Sj43?h8xZ=rJK?UTIM*UZqJP41Siv;#t zqhTI3o7d|GxcI&x6{tI)I&0m&rDRD};f-P55u^aIt^Es+l&e|C4z$;xj{fKTCQEGl z^+QI0s2BHUr=7p})V#jAU9-ggUiBeTGYQpPQOO;;8Ed!hWrW7d2`_2C#Gq4be?6wc zXngj@I`zz=^Mw1Ezfx-&w%kILTXYGvuU>Y|Y-&8_GuYjD__&8MDq#Qbbz~nlSx^<{ z=5e`hg~Rw7(?bCWp(m+XO=hl%(S#Oks{b}^#G?2pW)JC?-k!{vspHS?5Ys6Hymegc z=)6nV!!*y=##DO^(B1#%|aVV;K7*#e#{Rt zgP(tgGSUIl2U%+bZ?p^{6HpWikAjGlMA|T^BR$&MR`JYAx+4AQ7D~@8a&kUJF6)^* zltsYVkqv1adS|C_=cuYlUSZ!46AiNrpi2lRF(Yi$)yKR;Dud$Ma&A3kMcR@f^sLqm z*6Rfj%#%WB1rLf}cuN~H){~~7!sfph>=dcO*Y+Rn;sY~`cT`O3AgRp`CaH|GhA>AX ztTfu;&L53ItR|I4t()Y+&;a>Ci0Zt6$4PaYZ&gQ1$*NaIQv3U<>J~yIX`B_$JB&71+uj~^U6pRr zALFsZDM+J^3PZp~@Cyx5q4gA1)5YU~;GH!gE~oabc&l{a<26Fb87PmHh@_2YVZ&bEfIC{`kf4sL7VR`$O|ai%3wD+AZmdZ> z5&|h8@aYM%S%GYh^kKOEGiU@i#CM&dzCYk#lA1+*uUDl;*0Z^q8%kG{{RDdV407_p zYSlzyl^WG?O_g^129T;SFJdKv1<)!qKmwo{LL4j`rX#??3F~;de3ZJ>K&U1thpGUE zTO0(xccYVjHcuvAi*6)KO=t>r?+PW1p=)kI6Gm_N2E12{U5a1)`_63p091m_xeb$7 z!25HU_mig4auepAv;hY0=v_~hCfrbl#1i{Frw`7o1E&I1$g)8U^J?{NSU@8p{nW%W zS-_!W|9BqRB~`|dm_v-VrD4IOUIaFE(?Gi11UoRQJq=JW_Z;b{O-6tCen@Fj_?s#E z3##D4jX7{$6jHC-F(l^9qEvk;EI#c=JmlF8(6%Wg2hR!ZV2fx|sRbRhd!)vqa zeDNN=4pevtJ;C{s{=UjK>d%ErR`4x*-FnxJ8?N~pi6`sVJ#E}%aScfoGX=fDFxPeA!L%SQLuF(lr`#AY;mnZrjT&%B>2lr*5}V6cOwGb&Ty61Kd|y==`wKdg zTLo)o0WrS^S&}Xb^4` z#fS8v0!~%cCa9I?$ES$cxps9X^1)wmTpmegc=@A#R_n4Fz$A*n=`w|JYq_K+ylSmT z>~ziyWx_Af^`Y)bEw^FRr9rB+!qTPup6=mDhiGi)1%ty8LNQ;*(`{4X%K-j>PlIx) z(&fRF$+9nhc_`WJljy9AS%3cnue$Z3y*DcMj`Auf0?Vdv;N>t68=gx0!}r;NR67** zUs?g|@{zpuQU8Xp9C?YvLLZfSb$j^2S?uG*9Gj?YOQsf!jE-ph<`5L;^q!H}ibZOp zs76iT^BU)=B2!Z>{1~pF?xFKPaYHu`*j9Z$`&{#wfoDfb#IZu>GqxTZudh9M$Bl#< z+JB(Y)i7Zyp>Qk8c;CFIktlv@?bRogHKvn1(&y*m;bhcyuV*)DZ>%@7k{Q=3+vI51 zwFYY~FMCUTpt!*tP#;&ngb`4yC4d#)yi3`V)MD@<17SZ?p$K-mfktdKUWE95MryoV z)zqV4Sh2*Bg(I=lJ;(c&mI=9MwAjA-I>4M?3K5Tm%hhm_|4N4cPuK8&yn}g=HK2to z8a&Q&-)gFImfDsPN$t>JZXmBaMT-;bfiLL}U}x9g;6~^kFqh#EvSn3ZGDIIV=C1-B zfoRZ27Nsb7`U3!&P2eT5**qy*g9qY(m;6=?g)e}+@^T;<3emtf8!3+hOa?SxY(yv5 zl}IX1=_cOR2_VwpEyb$_3lc7#V`kTHF{fypPKC;~O+$P@$6*6+-cq?|esGjr8J5It zg#K*VaDV^r?05bx3YePck6ei+mY?he_ee%aNQ@*^!~YbW;vf)pMb9$emH*y@qDibv-mO~|S7i;qNB zFE7Lp+@06DKqKN+M1}eR?Da^boqjVxd4}~G3{t|e*1>pEaHWVB&{JmDS?<={$#O|i zCL--g>f<2Yjv865q|tiI;epQy*W=y1M2c&hR*yFvZm4S?FuwM3OT>yU9HWHY&(bw6 zdCTT08=48zqe*wF#_qcw)b+XOYLlN5PWa6gUtps)?J3FJGsuX_a3Gxe?$f$CYFA@< zJfXEuvHh7>4vvJ;cZHSYMGho~mLKN|B;oeZXbUC3Cv_LRKNib1;C^)auF0KA&plkN zCKF?W66}aa6!Mq0==cU|M8@Ta#mk}1)zPm)vxi^n{^x#`BDW(^6#K0bE)MOUB=3~P zqxxslzk6-YPJ0rSEgjBFz5AhQT-ZNUKGW0uXeRvIkW5C>ql9hNRfEDkUxXg=?bbbT z<4}|B`6E9sDbGT$e;9Og8r93{|17-b649}0PYE)VePIvAZx5lA3zB`{SQ#I?cvNdc&5;R-u1pROwrOQA34lfG;`DDr(3D_eAKWn>R`{9 zV^lOr@7=i55Bv-!;z*?@w==NYXfWeLN0|1{D!A^9p`9yB2lELX1DQE>i0#EjOg~M~ zKSq^v$G7&9fw+0%;pP=ScOxz5loC zijtCQ%>tAZQ5gm(n-Gwy5KDL7rL+E;6ba7RgiV427CmsxSs|>Kj`Mndb8=$-H7tSFJH2^7URxd`{TSH&o7q_?wj2B*PI1aBv%Y@p(Wp21*3zOJn@ zU@62ZgB78sc=at{XHIz!9)RfCS&byRSWqangbbc>E8kH058t;(lI(&Z_oo3UMbN?( z>JijV@{4yH;$TRXx^kQ@i5wOn%i4yJo0#-4QptcJsSc8X<_U`^ENp-^i}av}qF>;t zg8<86Fl5rhk^@dc02-ldK;Ojwnwp;939m(|-|dBEF*!2;uRY2lm#>lw&w-g;ONTTV zi5Q$z2~526`9?w=9Zw066M{)_G2UD&%z+~`YEV)`2!hz<7nltTq(U#O z@yxduf0~*#OKNzX(Bkmk?{GIKtrUgWc`@kbu7RZr=Ko^tJ)@f3zIN|Bl>h-kHH2b- zfB^yT6sm}!N(o340YQ_{i%1i(?u1?f(g{uJU8UNvgkB7aG{v?70a3AG?{D1ajQyVH zf5v&<^Wpt?k9&-aj9i&(uC?Zzzf14mwfcX~d&40Hxq`dWRawXh<&pK8FLIDlEji*U zxVE&)esKKt%U-#zcuLO>BS*BO^lUg!onJ2`-%xeVCvfcQt>fxtRbJ7Dg8Ra~|HhQ0AN2CyfMg|U2{YHgVT-M2_{ozRMjspmTN z91bZ7vpr|)`stt;}hYmr+`us+&=oIN>VykqB@-|t% zH7y^lO0SB#t?WCqs;OyxPsM_v)%{slXEwEh7(KV&$$sUEQh=_lal>>(gIMUDw&=t5 zo(N?>PwR<$14st_BFGcGkZ>r3R?{1F&NY1-&GNWhQEdJD*7DSYce{?MPTBVCwYYaQ zzj}tBXZC!D`24R< zWgGlD$Ei;PpJYoL&sWs$?)r7lU+Mw-Dyr_y%{S{nZ~X>ON3TMrgPCc9C|-YPq>R@# zqiB^_%`#gH-cE!Ro~yf;6zKLD@imck+9S!cdd@$xbjR7X58FjnkfwL{Hc0Cg-g+QD z{>@xwWOdUHWRG6`%l7;hmP>wrKx)oF#&O%=~c_0dRu zEt_guqE*rREOKG>Wb!cMjF#`R@J3`6hqU!n8XlclDlIiBK$TW$r^zcNsimfBUa2WA zllDQ0afVNbA+V$mvJ@oPKUu}hNi0R<5ST z4UQS6?tAL~==(i(&=d{)j^|E-jqakCKWr9)5YEXJIW*Vi+du!J#oDFcA#+JR?k{suvR*N*>%#pB{Ge8`c5 z$;kT>?ZNh?$>PZ&i~1~dqDvF>tcuuwuDSmarjkvJDo6tsZyHdyIN~m*It6*J~lO|2`h%OVzRf)t(shYSLT!=I-5mob9xT++%{(F#LoiT11-CDQnB?;t~x ztHEr0qwFKpx1Ho1fI2k1v&Gji8wDmDro`!!jgL_QG^x#x#4`ekG9b?ogj<6lii$E| zicDWiERt-nRTrXTTdtJGa)DhTv|c&b8nyK^Wm{#! z6XQbQ*SpT47IIslz(;moePZlDC2vc{wq`mhzjspmzp}aS=nGEy5{5vuCL<>=aYS0uCjSirp@^ zz1)&{fcCGx|8eg5KR!keGE8BXpbmKKAdCcQ7 zN-{)npNy5-I~v6?EEdDG1CPiQ;PWTIN>qE)acoq^n(=YEW{TJFs6HJx;5+c)Fx}BU zchKdeAo7mKc-h%)&rGbVbe0$$w<3oBjc9S?=2i| zrc(f8-%fR2tWhpq&D#i4zb@*o z?AsZuu+^t0C9ou+@?{!#>fP(ePygTg+JVw^*+k<}r{(N7aCX1$CbW!q`d|P7je`hg zfMawDr@Oyhl>R31N1wF;UB}sM=*JM627~wK9-NpTTj+aah|ZG655pDF!V>nF+-U(B znBfoV)9tL=4Fcx8!Hx-S!nR1UB-mGlRSYcoiyq@IxrwvNoV1~1@>Um5Sb(ycX`0d67xsj#}NcmWg%Zhxe0W^`@E~E!kROf zp~?JJP4QBoOm`Fb7B#B&6oE$XmzRc2K1SufxHQfeEEYEBM^R!{^=7i%<0bETFXg5^ z+dON;lpN@Ib&&qp;YDT1_*3x?C$(bUZLC=1(WmossjFcxFdbd*DJ#!gZCY;>dB0qb zV~U?S+$DTE5P16OA z!2!@d7ffTYVGTHn_h@=un;T~ zVZv+KU9Kyxn8Z;Q~YWkKuM#0e+x;pgWgq zVF9EoaEvjGpk-kTiJ?4T%8^hs)Rc;y3!jT08H+PsYV6Ass zT#B4&H_R{4(vzSkD9Ds1=R$pPW~Tx-9@uZFUeFHE)268SBAwWBQWD%$aC=4QAEyGYax^Eb4Q~rSI3QLo4v9x-&XVgD7&?{@K-T{m4%ObBi3EVGO-%U34 zmYjyltX5>W93_Dv9eKb~CNy9)S$yxypCqjIaltWJQ$50604@D1@}O&#lXu#^{<5vn z$*5LZmE&sf?DjLu*7ucnhBfS2J1p_cNGat}-SDA3+M4oe+0)arC6CK5MJA~{YbHRXX{iFS zQj@-x62+v@89FahdrA#=Dx*0_QJ3T8ox;bO*q!mpq~4VVfMIyTL5pEP&hKtjgA)eq z%;9unfNjT!Z6y{4DbP(cmk?i*3;iz64}@g%1y)e}Je$BmK&oJW^nDn9&6JLOa@Op0ZZQ<&FwJY;CCjrrn$FN`M_sWm zCnfUfG|XkzM2baCi1yJ$2oB`Sy82VdABD$zXj>r+YLh0(TmtKV?+cCaOIz^L3GN}V zkLLnEt4dA0l#(;EcNCKy5nSb`Xz?X8l)(cNQHA$oEZ4V3uQ5DS#Nr1w)E~lP#RBf-F)UIj5N>}$MRc4a%U~>5Kdv|}&R9qIFyHa-V>(z#`hx21+ zE2cXt6jTW@m{OEQxg6G~$`X1QcX^GCy{VqqLWPR~Vo3#PJL=##oh>>*j($(k1>S=a@W z6s7V~NM_2Jm!;>CIZBwVW#977r|~+fxTtDfffz*Ho2SFfAu~pmiqs7t(d?(RqAC~L zo}+s*!#Kz?nTEqk`k?KutjzN`eDI*iAANtsE}L3Ihd(@_%811WD)pt>fTDfbL?}N9 zA-5T(^%pMs4DP&OJ4iH`9I{Lo+=nr=Pb#`-Ozg_5>Z&SKOLrMtK^WS8ASWbB!X^K* zNvgivrm>8zxb4O@&V89&DYn?wVH=NuL_6%WzL8Xu<#*ml;g$Ip?ai4d*hs&Qk zKa=tr@)PHd&E0ZOeH?W0s<44P&iH%JXT1yj-TqQh_gM?<%G4wjcY8Q$?P|;QvnJ#( zGudwv9)0@T#9{rYx56`sMrnr_)=ly3beYfP4#j#L_MK}klBc7clb*Bb zG#N)S;iV;U>DF!i4%t}vTpuLMwpT0!@vQJiDAhKSz%)Tdwui8hAMFA@iZ}z3$uQZ{ z1<|@jtc7b*CbmgBtpPX6NhXwI`Vxz!b<0he57;=Tyz{QP<odhFp}iWDn|5!UqO<_|>W}EpHKOMMzWW|FJ$(g+-RtkKlr247t>1i<)Sa za?mV~yX!2zWg0y=R7PGN)56-B4Jg9p4-BLHunSd2T>)wuSrLF_F2W^w zED@6_7I(|59*?(xd|S% zzuj8`&2QGwZ_pZ}8;1^%;@2nZB!XJ6<6E6Jmoqn-Vi0LT>n@f7veWM`y#LJg@n-GA zXZ5-S7=E3)F8ql@JB_Cg5vF^#l#JKt#f$Kg)Ln!7GczN%(eKrFUOxCU>A=1o^^L0~ zG42v@poZPgqqEfeGEeVNaLCecb3#Q}@Vie1B(?Gk21-HbvpR7gHSpNkS^_qyCOlh?= z%VAk8cG&~tCOqTw`Tjr(*G>8Oq-q0RI3ne%gOi6zNDbm4>@uHCg9#W0+xvv zgu@~9!wo@{7@&aAcWJ_dlF3OB4#vBm2<1`X!!Y1ZC{YeC*2vnof($`_jyKm&*x0iZm-V-iFpNmi_9pZO;rTOq#ZZ@aAq~u zNPmza5QM}LjxtqJuGuJ?D))`uIr9Jnk)UL=6dyB`CJb6 zjK|2ZjWLQ`@qtI9ZA`i-YJFg4GgpL7?Wf6c>9&M^YaDue3T7akPDcaleN;ICDFZ!N zrz+SBs~D))6`Vp{9=yDryCqRD`=v%gKFfD7DpK&hf;a}Z6I`_n8C)~xx+Y?XGRVD8 zbNthpNR>v6zl_{)qJIEu$-KdGwVPqtGt8)$WX3~?wv1(jEt1B-AcBw@lPtFOu-Kx9 zKS6R>tq;js-)#$_nX`mA9d%YN6YHFaR|N;flf?P)Bo*;4QJ0+}KxuNd6}DPY`gE=y z?^n_yyQu(+f_Jbv1Vu^NyUCLx*dwCSnJ$4mpFi>Y;kdFX4R+=&G}-6=LC<`vEWjr} z9xYFUD@()p8w^Qd7r7WH9H+-ig4RN$vsg0Rm|+KS^n|v6^1<7BiFt+`8UO|G#()g^ znkblkP&;~?9HM^ym>b7b`G{siYfQ_gL0w?4n>_9D+=*8Ya_l3=O!okw3#3PWFY6%W zZrVNn483r%O6DW&riZf<#`lypAA3&djpOoP#|(wn&VyOn$@j#P;`B7HJE_zN?q_uS{-zpwp%^84rar0Pqr9zFVgMXn!vM6;+m zJ!Y4^$+k-@4Fg<{fs{>qmihdn4EmFE`zKyr4D5-9lxQ@z#tN*ai?V z6p}ZC5LLsh4QV#R7IcL9i>YJ4qi`z--XDCvBUa(v84@5eEdn%9-%$L;*up|0WEvVB z4aOlLC}|!92(22oC{pW4t})?QFs~l%&b7j=`_*2#^>3|M;jNVlV2`oX?C<#Aq#U9wFf2-YPoAY?C&X0hhm7b=Ik5_CmIBR;rI`E7cC}->N&7L88 z#oEYTs`u^kjm+KhDRzN(Lj++O61M*Cp!G3FBz>l60P|>wa3kiJ_NI!nmGvd|jg8^- zky`S@J;Ey{*;x5kQ;sWzpY+`B+`W6C)k(X=ZLX~Rie|%wn7u?^(c;cyr?ht;`)Hv1 zA$7Jath;+I+}|xV@w{>T;EjS4wU<~wUwU-UT>LNh(|iE>G_D^~Ad!#C7a4+Ae?876 zHX1`*lqIAkCm2ekHI-c)71+E9I>f~2V)FYmrpvoT(FV3h*{GL~m86)Ua34&5I4KAs zE15;Jq&AC*2=F-2#bXdAtBp!Z)NgBN|2Qu9bwq`*Kd)@D*u`%eKkw@XXdAK6j?%do0mc zJTtT3^(h(b=26vNh%b?#$k%g4Wy9ZKKDEYyy2W1uuPDYt=-Km+eUYz_3J%$nO)q z0%5Avy7Uw1HjD@zXBm&}dxsW1s<&xGJRURB*Os?1r0!R=E+Sz|)O0hFimsBreWhDC zP6GfLG?13H8-7f=C)Fh)>{B@ib@%M=1yxWQ(6`l~Z0J*^c0H1UOo6Tfk1}NVUV0${ zj6lSq^KuN0#X(76m*MP!mHZNnY3KSWjPE2>j^4GnqX~_2->uCMwc4kj!v#c zj^UH#_q9+F@2>^BWFI1WAYwYn>k6A(I+VWUHE~VHH|!TbkdG#llFF zaFQ}w99YFd8e~%&5C9)&^Kb$yOh9EgA|}vSL=Mhp+P?iq-=A@ELVwB6G1tb+bF$@h z?*knca#XLC8;R-W=kHoybb#L#|EnN32b$O@bC@V6Q+sgM$LbC^vGPvqr0JJTeC4WU&Ma=a9*#W7=AapeHh*I@#F!owiVAVMP|n)TO8*h zw}w1~FS+Yn;yVmKfD&ac8;?j6Qch4S+L^~PmPIzf@h9^!T4x^mMRknLHp@MOqdyN_lv7)n*?Z)D7pKfvtnMn5% z7$(t&d_{7|QR6ZL;{0iOXMfkhBnh_Ana)*lEkrm#4eH`Z2wf0}0`JR^yta@^l|cR} z(wAuR2}c7Um^9NzlA9-;WK#+0;Cd~EsAsr;+?bn8Qod#&>1F~wV$^h&9H?V3xInRc zMwZv1a@VO69@J|ONM8Ho7ne~mRWB|<;P zRRFz_ExXjZrg{T}chIhOmuL)|+u#zWxp3ZTY5j6fp@F)0ctGeh`B{*+ltaVWU>{LG z!H^R(#YyQ%z?9D9hpUk)&$f9N54X*(#>pKve?|B>R@xkQwd@JGoYH;oUC)u1KC217 z10J)X$3GbRXI(e^8m4+v`;viv+QEet7u)4cZ8t7_XmN)GJ4x(xfDR_S0kQEq?PD|3 zjWW%R7J7$M=kl9E5BlwTu~{#(vjhG@oW1KK5zYn|vU<#aabWeg`~q6^q3VLOT7^Az zca)-v_!-FvpXz4YxI+Rw{+5JRZFtmd;A@i%?L3F;J55%*e_ZWqvx(9Bqwnvy z8Ivy(2Nr-po1;BGqJskWcby(~DH8`y%V~c+ARXM7HW1|&O4?#mS(1T%V(akU*k#J^ zl9&9=w91dKANho@-SWOImT_)yAp=dFI^=nN%;Vgx{ELLvlMl*eqx?puSP!$MtjtAq~sWs9X~HKVC(fAT6dsbC%e1YDKz@Z z>**ojc2E4Z+xIqx!?Q%AIn?K}GI{(9gU!RyXIBGy{iOtIy*)*b6{4gOULjswxw%>p zChnpsnA)G(zgm0uM8k`JFRlKM6$Rl7(l{pGyjHTBw0D1*!S6X&(z@j_^OpnC;O*>> zBo^!)nQ_MxoEm13?qt3JpC>&9r7eTOWZF5AIIX$C(ed!u)_Es*Wv)vG%(ITPm-`vf!lxl1#Yca5Hi$iMsnyU+3J$mx ztvNMRY0#-2^A&5X9sZ=LE5;Jv6ssHk_pZuL1i<*C?|itf$y>xj87Z4b}AXX4DYDR((PNxj8(0!oS() z6#2_F&FoEqksY7n`1f0HS?xJ>NptD@2(ElYuXq65DVz;CKlSi04K(D*i7jj18(^YCg| z+;{c;A;tXbo+F{J3;VS;2louPK2uq`HuUI{(~-erKzmD0xscM;*`voNlMiJ%Otrl^*LxV75g$bxPJRKc^A%)PNsJ-WQ2y!YZYWHuq zPLr8AD5R;yeXKf(ZKs1JMXinWCE4Z*ji<=Dn92TFNi_XfI0h#8RbvS1_ylffXZ5e` z*MRe^aaeH>l;bCMNs7~|YNb=S3P>>+EJ+miTX*beGK7X_wPW#A7Oc$f6RX#`Y3gYNJp^b6f+zstQ9JCDaR z#y6b1vA2sA?*{Am>$Exs?JGHTY31y*uP-ZW%G1;$li(rY7N+8ViDl}Y1Qnj{pmu#qLh~b{I|Y+ zZvzsx#oN)6U_j%QcaF(gJRc-aSp=3i{C#W?sYB6_uKDn^6ud5owLWyJ_c(C%Ky5h@gbUiz(M;YeCWmJ=dWYY+xFg6 zJkynZ-EH@5mcV_GKrzzNuA>+&k#FS@^#@wM-OuH}wbBl4|M*Nrkteuu)I;XAvI{lI zI%5$JVS4KXZ0+YmZ_oP0Z&IslWknf=C1{DpG(1Kj;j#Gjg;f46J#ZKKDJ7fw#T-BO zYH)QvmjA7%(fPsux>N&@W)8^W$w@7i?d`-V=67&t<-CQe`KL`J+*VPGbapfLMGEPO zCCJN-;QjDcWX~z#6RNEFljy;|DuGLT-&iBAdXV99O`Y$Z54<%d2eUJnn@A@#3C4s{5$8#X*Gq7*yvq&!r2gKqg4p`!ccSb1Q~8CIQOx8L8P+IsHsZ7Z!t2V+*> z&z|n}DX$nGYPomLHRjp;YFy8oTk8*c_usj*SaI=G_~AG7@p{T$AkJYx>*XnIrna$o-xK(}V>1O3HOi>VB4&#BJ6+nBcDybqu%t{>f9J zQELHqaV#~RAa7+cS+0~Qcdr`yum~5F6cFVEWtvv$R!D#XgU4!Sw!3;UjcH7~1pluA z0U=x{T(AkpU2)+4ROe>+<=niVb6P6LE=ZgjHt=E0tiukR zMc6eUHu0LU0UOoHgIZ?Nt@dG#oeI$6Qjfisdr^EO0!A>UVgJYe>Y6DE_?>V9Ee)kc zEr^Bih&U=F;|fuoKFj@hy#@aD=G}9qq_l5kKMYi^2R$)%chtQ7{%4Urdeyhfiu_A3 z9c&-zZllNR@@#9ExuitWE_2*_Xzq&7DZlLSpV{uSLsP8XpAN_5+@4D<4>M_2ZlK85 zS6@K$yElEfUD2qJi3_JYXe|wFExOo>+{B)K5V? zL9m8%6m;V+g4!1AU?5+3&I0a$cKm~&3;VNiV=@$*7w&;{^C^0Wz^_#`tmd_4D9%{? zpi?)q>;jMq+*GtI1srs8>lQK5TykgFsWG2#yrzVV-`S&vX^$=?>w@ZEH9lnQko8M! z4*#Ps&u87_E%58iwXDtJh!bH01#M-UIcG%bUY*cNBAkn{Da(ktf3aE0#%3wH zX>Kc~%@029Thf?y7LTCxF)ZEI!(H1}C}JHCQVB-mJ;&k&@8n*jD6c#0dKqCCzxR+E z7qkED-8t;0$3&NBVb7Yc%r@5itM7leeg1#nfrmr_NWGkEApfQ{sGgTcF&(%~I#ZKx zGJk7|Tu38<1~gkxgw_ViTRa7Ed=?D)iy;rhF-#y13@c6r72%DM%7~K$l=-T_wFKA) zJO~5C_0uOs7Vsid=zu+94it7Qev|Tk4}qd^R=cEm0lG&=X5sRf@x5F$HIvvmWc{W~ zB=EiK-6|0_0yUWb+^wPic0lF;eZWdqui%=QsrP`RK0aei1({1nc|gjAif9dZrwu@zgfP+bv4O$;Qo}e zIo)6(oXb3WX^Z?h<8zT%)&!Fo${0#H-@KP~CIW3@v(H(RUeDMvcTB41)2a5jHpFqO zZZOSz;N*eTtDjr0UecJRhr9Hjiz-XT)W0NeRg8Wh8+-a@#2ur@doOy0l01B@H+m!L z`@&mYGq%Kq{wy)V88cZ|93h*JRb);W`pi9PR{AjXq@h~;oYEdG&cFKp-v=xJN!`Cs zH-rFCWNTq^8*SiE`vK%z0T_<4LRSq_w_%oOTBPf+hUc*mtM*|N%5e)M^ZO5geR+?oex_sE^!gOR2Yr9;aw8I24ji@VPcY$^APPY0xb zsw!%-dua1 zdnfd4LW@nE(4SV6PK5$5k}~=is?@tQH^D?&D4Hrqim&ZAU$NTwqpu)%#uQR?=05Pk zOJKWnGxScD3`^j>K*_($(s0L78H|N41e~N!aZ)mnHb$}rZ=cQh5~-SgE6_z>zwTmP z>d&;L!K9z?VKtvEWth|hfM=jTk-|XC7NdA6ze`tYc z48b^n;;~MH?%7qZjfQ35SB8gro&+PSY1j-HyB*Qx?&* z2P%_T=rWK-b79|Ja)0(@FSG*7KqE=GHZl2a6r(CWpMuJgFcaMeXc&}X3_*gYIs>A? zw&~p}K^@f4mGo_SD~Ha|_T4~OJ7yF8HPnc6tfg8g;>%eqhRRIZ1(M+ugoJ{u0k5M7 zSZ2AXS3NJu;+%#vYWfyjY7!~3afGAP0-*n?x}1T#yMH7&Stb3Yz4Nm)Ckw{2v!j*U zsUw7Lq#7W`f13dqlq6;kC1_&TLZzG7)*~F3w|#W9N81=*&}Tf0L04f$T_%dUr>y7! z@HKffA=#F}B5ldV9&=e0U3O>(s=zNPBn+IBZOyVJ8#4y9+}i)izbC1t9@Q210e^xu zz-OjL0CHq}EgJAh5Ag>Q0V_Fx>^iNcgxs9WDib4B+MS`1*do~SQq24muWniNgd`Xu!$?6dWdTDU@8OaCd`|GmwD`PaTg!_L7h= z8#x3FT9T47$Oux!GVx1&MBf84GmV{|)tMAh$rnIMLO1YWG9jkcEA-R)a@)QZMiTe# zEw3Y$hevBN(qP7UNbIzFLHx{=MVa9anv2|YNS0mILG80Eyg5h0AKLIT2mD~;Fii99 z#$C4@bu7?jxlf(h8WU5~GE}tlt|GzdZbSaN|2Dz_m|7~3#=D5lsOk8PUC!X7-nUZR zyeaR@{d#OcGBYhY2-{7ZXQO1X5-EylcIu+=D{MSO9=ObB!yJ?4APY_6wRQl2MzHl( zEg)>XtQub*ok)uwzr;hCO7UnalQdEavZeHNQlc%8tOYo;xT`L{%onSt;vKDzhKnrY zL%dcy@5gOxxEjFPIvhz45gXS16dF!=$%m8hM{4!!Cj^FXkWOxs0^ytN? z>W`TVgSulYhW_0iy;sid=_>4vKl0%6d)3%0d+NEvq0uA1kG=MOef_}g+s~q Tcl zHtiXky7n~wxAD)NdM2`lE2QI{YeV9fWHPPd%TA5GmUy{j&IF1fIQbw!X_834Tcd_G$$6&fb%A|R?v`g;jja7STHCZG~ zW+?RnG2MfO-|ixvArOPnLKdviVF;9EH7$DC*wjpgw7|Pnm&msk@~qIJXe~T-gZP4E zrF`1SMrZpaxS-G7yj75#Y%1e10k1WJPn>WjF)lw$^(T#FXI7DGX3TQJr2DeQhMf%0 zgz|>G^zJU#lFQg-?*{ieNcKDW_1+e0`vo} zx?^z#fAoD7g*G*YeLs4O>HIWykA1izOWPoy z+OlOV%y_oyfzDBl;cUUnn38X-e3$38S{FtGJ*XQ^$(gAnicV^zyWa)vikt4ZefOs) z_g_%#wcXg0*IVQtb0GbeatY$_h4Ht8(A6utDK$GO5u%r>S=u(#DWwlRyGP#}S`-%8 z*9BWdaqi!r;xH=vm(g;49(;YZW1bcD!Rl{QaeJO@-$NUSkY<;#n8^}(+yH<#y+ zoT|0|RN`oFB1#JlfapSR#o*LKX8+aqPyR(rszUt{3{7xE|EX`j5m*BAhb|Nt9D@v? z@HOC0l!oa{7T7LDLF5{cWMsQGgWlOZv1Gs(^EZkAJXdV!EgsIRQYC1d=FwuX;_eoc zG^)O?_({4X(pK(o3<}0syM|E$7mSHQa1I}CrUJ+M&ICZ1Nt|JlWZ9Rf3N5HqB}f0r zeQ%yhuoP5vgC3MBT{?+;KrSn65AaaSDsSk~ETstoK4PB7v6Im%*hh&>A)Zo97ug5` zoT)>uZ$nhsYaw0)NyvZ<9i4wMPnD5tV3KPwLgaBCR#Y|M-#yioq7ht1>t6(Q8oDY( z|Iv3N+QZbWPRQRlTQdK-q<2#A@|9IV@>g0hj=srgfBwPG<%RaK>9)^&N8Ir5O}A=y zkW0!OK5IMKI01wEy3HOR_;huhc1IvCu^kiyX*=z7bX$37c-ZO8rCpDjqx;XKDXQpt zDi&JN_0?bjt!gcv%isBjk`BrqmAg{gtR+^Z7rK2~k7d;{Xt;NxRFy;6l#+nKtDCHP zB@K$0rvc3&#mkVYZ4aj5>O%V?u4!?iRSF8BuOqg*DF2bPM-&v+oe3bo7)sVP9REI}3s)uP-T4vrLS zz%Zx`RhTV2$%D}b1ff!pn#JtJzAqqL9i^)@XbC1OD?m1tCbC0N47YI>^Q4CbTQre2 znrlP^0fY#W#m4%|%1B`37(9@9nI(bXVh{p}ZFDs#Tm5cfNMF`KMt%^pT!S6*1Zg5L zRzzQNrB71Ohe+8lM+ucZ^uOSAtRh|IpkM$(|4WH`+5m7&XR@PZV>l+zs>IisxNHe! zWT-8T&p=}Art^SwzL<2R12*D>a6mHX&SF}B#{N^Ieq>#O#vsjmAi zKQwI*Sy8W!f4B$cYU`~N>ZnKd?o7WButz-Qz{;y5NN6i-Z~BlZ#Hp{J{|qG!Bq>}Z z6RS8?XgT8r1t#1dkh9YTh!yQ=egR^D8pOzhUgiPelEOZBLkt~&3RE>Lm(@yy(aYitFMmx9Qp*^azb-wWPm2am zF&4`r0W!llpOGHV)W9>F3;_r|M9pa@On?yY)96cC_L6h8EXelIG2=y9bScx(71tLh z>GmBQLM@tI|73rGF|mIWxsQGf#kk)h0!J&r8UCw4<^)#jd4iac91hoWYyXI5)Q>;< zevJ+Ivp&aFIOMxh&a{mGJzsJhXCL9{YF7lW{=#YP6!BYS*S=D%qkf_l?Ol5ALAFEe z^Y!U=rkIwk`0eW5YwC6FYS8BgxBO-Xdp&E93Pu(pr>x|@Ou5~0*gW?f=5+CNTl0s* z>RTn*S3YiJzL=|h{;Bq5pSiLO8zpmi2&Sg}3;j23etrjsbvfstj~c7h8|};eZ?>6<0xokXm!5eIcdLGtV%T2Sl@S=sE#68Z+6!oy}KP9 zU7!ejBAKAbd^FEB3B!(5Lvjj{nnWza5G(EE!q4N1G*F?Ke7HNNgJyxx);? z;AGpR`!et(c0UWxs# z2-M8kIJAXH2@N|6pac49JqXQqip91?qzY99!${3+d+CntSm&-OjVeY9u4i98~x{PA!LZhnxFmIPKi?dTQ#<&8|+K0JiO1 zcC#~?7r)PE5AK|2%%!N_tMju1S63?izay-ir(U}R%$+Qh4{59kt~!@%x0v_T7LAXqCJ|(>OCkw;D=jN7vK5Vn0f*|k zwxsi`rqQB6e=GtiP%s9Cj!u!&K)OUjxPd1A`)(LQaiU+8BFq1p1&ktXKOG|nJ8D7j zhVinS?2V$5XfdQR3MrSgQs4?VvxVu)P&{G{EUD_W1(>J`W!l}!3L?)-FCQe!qzqPT z8Iq}5AF%l7{PGNWEJ(xKji)4X@YVPz^@uWK?Q-GhxmuPO7K~2&hu^*tEu24se>O}= z(*~?x+n+wJi~FV7Co$O9~`SHrJHngicm z3B30Uzhok9CvoebT9J|0yN1gZ+1c*h3aTY*b9_uLTXgQmeOLOGh}Tsvs2kGA%Z=iX z^>AMkRA+O~tkecw$p0OOt5rEtUFdk!H}&?+qBtpJKu<64rueGaYg4hAhVu{l>Ys*; z7OiK8_Pvz;FZ%w+9mkbK1|G)91;GU2UMc!7g;YWgL1d7F;PoN!i0NI`ah7w~l zJEp)XXaTf+D|k?nvKyvA4l8{m38m`utJwYhhhH}=iN3Pu)oGXb*Sg zl6kgfpv&b2Eqv4moCqmqW|A?C-G{kcQ8ih{?{oHt93mNf+DpUMyU_r>r6=vt6nLjq zUz+F}RU5b#qGlD#PvSr$Rr{`X-X;Ks7k^c3|?MSMceFQHT7RKWEGrd&d#tI!az3*n~UFxAFyeYnOy2r!Wf4YFn#x z)^=Bg9^CPo&phiO3)0SA?@V6Iy%-B(%pdRmZ5GwM18v(ixxC#eOfgUjW{^2ttgubB zFMafBSIdx2q+`8TY2^`|PQ{(Bj;#G|4k^!qF`uvl&eN}ld5&I;Pz$L>O8*0pn&Ed& z)N5~?Yxo5ltqpS8!TEh$i=$n8*e5)We&^m0G^ZLG=HBoTpk8+y{O|gLP}&jDg3}2e z;J>4OM>T@K5)YE(20Tg6rA)}z#StV|_8UyHF3UO{&!mgBu&m+x86xZoh9@lNSn#N5 z0$d%ZeJo5v5mPs?nzX$nDSKE>qxAl-V3=Zx#i}X{DHNbtdlg0a!;d9bG`%}m=zE>YxA5RYWpm1OdPrxC7D;6vd;|mg zQ!86ktf;%sZ}$6Hd@Fj!z{JWi=Xq6F==Y*G@n`&S4MlD%MVG#Qd(^q3h}UiKe)^-D zhwMhH=Q+<|#UA|(pG$i2zMHhlpC8y8cz)Ec@x^xe>#AOX^xo1qpS*Jfit{DsjQS3X z+0^5Uv)Xzs`-_Xr_sM3iRVk3?{5%Vvg3lDhPB2H0_MhU#>NiW6xEihQP#JrE@a(;h z4vv|Rm4dFmGnf1t=B43$vQYF^xOn~2O(`ey*ZoY&KmAt}eG&-%N9_K0k6@~0I{16= z87Oe*Gy|McP3khTC-{&9db-yqmv%s{lajzv)0p;Md4F4N%u|OrCtbBWw>9}xblq#d zWB*C{Q8H~`9ubl7N_+L#*wV%-;cwFqFD0AY5PmoL7`xYh^6#zSZj-e?{G94-thwl4 z>T3o~xRZ-HCU4(A8F<>##NM(YmBzfcTTA)%4eI$v0-3=BDbC-iL4-F}1za`9>H1jH zwy|mE?Jm8;-fqn|QSTANvbq3A`}UjQhtN-dso0ig(aPCA*6UXfvK@WgAAPwQb%J_- zn^kt0_)Td4!-OHYlxXB>*hkIwmqyu!Ulld~#&elJXdk_ zD1(NU(j8p+*o$TnPTVCS(9u%x2gV-sFN5%Ya5HyeS8gAJ%9Rs|8@U{cR^#*dYhn&i zJ{v;c4YBUKCg&qt{K`OjQhs0ds1tXTh|ZU)w}Ty2c*$;FSpG%A29UijQ)q4`UtP|m z&ezPHc(5;Wbp7J0Ra--OPD4=X|HIjvh9$kW@uGi55m6B*R5V1KCqNvt0!LIV98=Q* z=c$%+HUlb7SU7`O;Y4ayR%Y1<&I6UE%~qObb8qviwOVT(TW6p5+Gp>5oe$?57kvD2 z-_QNr&u@?=R<{p+Nqf%DXgM;aGqFvd_a|$!&e$2{7&(uznSx3o4Z{lX1NR&c>Fn_O z+^75ZKW2j;1xjNmO={()O)jRBh6BhreX=l&_s;k5u5m-MV+Z@~nUn52`U2nmer1@{ zUS?*Z#$1bKZhE_5g5F{e>SEiqu*t|05DlHh>C|9nrE;>c?3WlkTu1~#i*{uyU3Ka=-_^Q-Ah52J2RZO=aGWJ61PzKJ(_|Qiox4a^S9s zM<9>^SGsEc`ZCLbgYe*3`61PnWR%iLZ3z5X%O?yyq{SAFkg3D0D}|aA)HYHbK;ojv zxHVc}Iew7~0BAK(@wlUSW2uBMx!6>2GS^Qf1?g zWxys=F0F&c#csueq>O$)7)XQ1jDnKzP&7dQh41@4L=T^gResK}{VJ1&bXTbWNqbFW zO-Bmq!D@Lp6e%iI2MO>(T5?;#7tR(z!E;#o0(a0&M}v*m&JqMMC;TBN^G5LsHuApT zag+_Pd2;(>XHFrtl;^_fPwFAoqwcJhJNoh)wvJ$@T8!i> zw`!BJV90RdFrCK0+5qXaMOvq+e|2281`uP@yDEV>eCcl)raj8%9vBF{v)|H-Ap!-h z|7UHv4pSxPj|%0!x2L{e>eG8#n9MK#KA^SPF7oV;U_JAGwj{7Ae__+ej->|WykW|Z zp&g(6M++k0ov(i=+E1*_YTL%(K5pqJHV8jS4mMsr+kZ<FPTem;h&NN*Y+jn>4&))n$+Ml{K|K3_uJh;s4usiZyYl;zGxND1A_uZJi~FianUXB?_Ddl(mJ*jmJE_I6H)3( z0J~xZ;I`wOgG~>^iJ)m}R-EZA5IbYS(U#QF{EFq7fVSNsYcKwQP8tdu)$kJ2u$^Px zGH0O;9*tFi!qT4HWZ)ob!utkp;GZ|?wctbQe(OC9>Tpp+E29okvXeC(&tUrux{UV? zgBDWDMwh)&#aRGBt; z-5UZo1NSJ z{ZS6uC7&MOS1g1K%+HZg zbCeygPHqPSq|!w9KqPXQs|_CoSzd|K4)KPM@mk{8GxvvPNI2tj@c?viH3>JmD^s`* zCU$ZqA5!(#lRg__x*dMpU2U;7jKIBwy4VRs z?NLwf;)}?~`NsTK^8qiNJ3+}Y36O5Ty=H;Bh_12nvT%<;kH!VI+Aa&$%b1r2BNbNE ztzedU&!1lP(NR54#|_g?ud{+x`gJv9i~raJvfDB_Mf~KIS-MX!SHb7(rqGzgy zY53L-D3s}7MG_%aH`*X1f6muc2{m>!Es!xlQUC)PHp>dpQ7qJgKq&N13OaMVhdvEbuBoOEv=eujq85SEUF+YXDQ89W%>@i!R{ zDTPg_5y&zjLKLbQ)wTfopVzh4&hH+;?h&Q$!Z>(st`hJ!DVC)zdbZNUfQVxCabOHeL%}t3uz+KLQc-DJ4sNy zmp;NwWyaQZyG|QIlmg|Im(jhO8ZkerrrlOk&Ih=(dhtbQd4-rRHsmtOqHYBX?b=E5 zULZc6CUw<1)!mH_G#+8jgo3)x*XP`nx_)(!PWXqI4A(<6YrM)(5CJ?D+&WWsQSy;o zcN{zMQ4!P(bQa55c|+sE^HGD9%JDu2exj8SrQoAjzwi|$%zMy)Zy%atuXFM>i9YbX zWAR^3NR9+WIU8TJKxKEFdfp$i++AcwMfJBLtJxay^6H+8u>N8z&$Z^&H%8mS4==Z} z{`N6Qxc~0_g}=V@l5ShA?~5LbJGkXfx>a2uk$S7=(%1EdcVpjG{IPOwW%sSZM`J&1 zE<4*^otU>tIB`DV9+5?Dg_o~odHQY9EnR$k&K2}9gBkD}xxY?w3)8@_hznh6&rXu` zq(Lk4wk?l(-DXn?F1|K>i3topWd6+-(Khwr)@+ua&agM_77xAcgP``tQb-057}L9j9%-{fuSPb&9Neb^ZIAuIjAyO96B760!a z03u#4ga*nPic~k@Ccf1p_7P_HC~635%0$pUjMxpRoAOyhC{3k`93_uMqu_&Cigw8T z%9!0kFc6$_ry!`p6I|ciVUt+N=f?pwt$>SHlJS2#kD11VogC6C$}541>l22QS~EtJ z_t?LyQ(b>{&0O9ud{4859`uB*(*WvE*^*;&!iKHE(}Tt3ElU_4I#XWrFacx|&;`vLRArRFu0bw7SbK1h!)iP`;S)iCc}V4XPP z_=~kWu11$7gdDHr)iCPv918s;3_H@qca+?!&RhBl?zL#m2?-Ze-aQI?z<_-lM!L3PA3Rkq* z2NO-a6~IyUOSKgP&@=Wm6$=gaQ8Q2ioCGxmasjlFEzkzfVuhiz?yrue!Yc!sP%elc z0G3*~fxn%a+rdiC(9wm$ORROB;uEA64muf1TIXWx?$Pyz9)_xD=8~2sB6=N$6}sRy zhYD3ULTEq**s?(x3|@yc9WHL3u9}*p?x;!_fk&k#7NvPA7A8MHJ)e^kSvzC)_$U1b zOsF)mE1{AW5)e8NJO(iP^fXI?dn?m-84waP}3IeySf5q`Cd}$G#XI zcOfJp(akVd%h-^|1~n^{>f_&EK4!i{Jq)_3=uX;sea0|)AOk39=}%d!Uwy&+x44n% z(C@kr5WwMN0ak|%R7BGz!uMWM%>)z4XeYW8WwthA9T{J1lS3E|cCxE*0~Sj1p&B5{ zs;XEO9fk>6VZO5_M&?C2Sr@Tpv?_mIIvQ1g<>&5?RRv2=Z0r$M5ARGr4Cu(n;RzA7u_=aJVZR;jL9cm+MF)aUg^R`EyP!s%rCF(5{ERA1v)Kvpk?gs zFj^Ak!sBE+o6qs&-{XB&xs&)h8^7+N$F@E?J$~#V`BJK;PgDg4XyuJ4b9+tce7qD< z*1TB648SDGcGaW=)y5{C=`r!iKIOr=Q6W&$&uzUZze0sGxu%gacS#3y=V0>(xQ{`# zp+;z~wj^gXYS)!#><2hkm7yO~R=;75c<+AU`)#+-Bb?JT4HVz9r!a3+hIvU7TEfbo zbsIO=a5AgJ^z;;Fxrm*jY9Naz`l>Lkof26?5UuMld!#P?*-!4FvbJM82_zY!>+{C# z=WrwN7IG0f8^CH=yrZpEC5QUTy^fy(6{EptIr&9vkW`XGm3bDT$TLdmixh*(oP$uD zuyyPa1O_21+v!vc1;P0Om@c$ezK}luq!a-GSNOjYh18zdYc59c)$rtarGXQCFA7>1 zPd$aFkqJvOCK~hF01atFC!(T_0jYvO69lKtCL0_1c&RWX&z>jHt)e}0%6JmaGvDWl zMcfW%Vb-c4Kb+{*{C^&#shD*9vJNtGyfszDyF1`G zHJj>J8t8LS&yEcS*NXER+!na(pi)0g#Id@cVNRX4CU zS_E2dS1YVB!y~}tRe7e<02c#~mo9|sGP6urQ^cYCe5XL(|$P4y-^=V^F;<%!Z8lHT!{-7;i{uWro3x&KL%f@jI_ zi#2B^SyNGWcS^jr4n`fm)I0f%E{x9!3B?{3yn6U`%S>6{QSV8+-pP!E?JeRb+fJ^l zJOAVNOZ%hl9Zg<%oN(4!Z8f=ZCduiPLh@yM5&b{fZ|tQ40%^}*x>@F*1cE>1LD8?G z{ND}7ki#XxdY2X{7roy$e*D&dK6=ZJryZ{lZv*B@H-n#jsX)ZmhLGe0GzZWsFBKzH zDV>tG9}!xCxx~l z*EwOhT4yHh#g{lmdQDv+ErdFXxQx#7RL(NtR+v*MRln-@YmTQ!Fzx2&mvFR+2RK2v zQU{KIC94U=UGgwwzlZZP4IT#DF)+!=QtJeW8tFE~i)^e(Y4s+VlgT**Y4J4TN26{d z>eeCOV?qXdf(8Q2U_O96RoiU?g*`m1iyNkbesDo zn)DxKN)-SK5v)(dfwSh>J|Grb;MZxNj`1$m8!l$i1Hh@p2)a#_N4jGUo=)H%QtQWA z?*_cuYa&kZdwHd>jf7pare~O#hd}5Y)_t#b2zVS}A5FLORU*q$b$vp0+k6OsC}y9r z*ibY1l#xqj&)ovE66gE13iOg}c1@Y#Q}WM_2j7m?7;=j7S*eMhd%D(OlV-P`!p&Kf zFc15`zZ&&Zwq9n}@nl!C>Yg>+!(TIn-cFmsm#_Z4KhDn*Om@5cPU<;Bd+=(H zrGhLmzHG|}veY4pb63(fgDwT5C;NTKd4l;|giSQ7`*^~W-jk#;g}Z(GStIj?(6{e? zmo#DW?FzdV3~er3H%E>=G}(giOqh6`tr>DB=72-sX4NbAUrR12XHL;Jn&CI=H~xVN ztpZc)eiUe9gWLju7K@pNmkQ4)dex2oDIvP37*c&frzv$dZRuh(gPK*vPC=|R)110Qeeq^hnOGX8BLRYu{fDdgv za86II%NRcCk;d%g9q&v}8#nZBSM%S5B|oE#=C!0jX-EZEO${GVb7wU(ac)lcBwt*i zYMYhB*VF-_&B3ig5Khl&Q}B5cpcI4H27T|JycLYQ?FC#6I^q_oby;zT2<`mAXcu4B z){dUbun-Q3Zg2P8{G@p`ET9^XUp)2U)ckZByg$q!Vef^ip$O#K&B3n1 zSV^ymoz>H#;LmAUS|9?S3kW9=1)GW-9Gxi%TB_cXqKc+z_hyoKJfwnF)?5S;A=p(i z8)$kU-gL;^lG3&mvbud)R>Ln=BH>FgV(8}A&+?fg;qf7h?wYJXnEmv8v7yuSP} zUfP;=jM^N-YZ9!w#T?SbW_~PflTb{%W*|tV+q_c75UN|foUUMwhs$pX+i5tzL||e0 zGlf9n=X&as2s%Hp;U1w)Gr$C#OO#mV{M6;B{1Qu#Tjr zOdX8Sbx4LDE=w#{ru8DFWXu;pBGCeE_F>*Y}}Yu!&)T7Nj7rx;-hrkaO7Mf4|k@K)j=k zlEV_T4ETVLzDh|YV*Qi9FRF@ka+Cz4F4R(?@^U{_z@`nW zsMy_Q%ZAeDeE&L9n$vXbz>(Ld!e#J%;dhSPYNP$W%w<1(yV>T$+`9dt%TpZ5*biSJ zsb(PY474I`l41=pWaRZ|9MQEwyh7pB0uCmV5 zBtF0}mo_Yu1f1RR+p)um(nX7Z-;dt>*5g9w-p$7$tPGmH5}^{aWcI|R!9A|WQ0-5d zpS5XFyYMF)EJC@Bh+$i*;-JOn$IM6WuKp7$hR%M%nMSJT{hM+(t2!KpRVa!Lse~p5 z!S%_D+~Pq%6-k0I<>A{Y2__NxEcGyU8hx^h`4Wp!<{N0dK7k z6J&XGS?@LjL{)XA^({k|G~{3z{V4F`NCS3Zf_n(zGj<37EdY`1|D1b<4t);=xhCb@ zd~_Cez9S+|!AeXIhnvL0oZKp%xcZc8F8b0TILMXodqX@ZU`VHUNAg?eHmR;d^1On1-0V1s0Zvl16;?EzD~fu`u)H265W@9Z=VXK0jH$Q5$9m!O~O3}h_rZvpd>>z z4nx+sXi_gRb>QI+50aR4B@y+ZL<>fMM|y?56nuY4M+`7!d}>iz#BZ2T|3`9Dvpn|#^%qL$^J zKxTL$c}0qs2oGaOt+D07y+ZwA)k?#bmrS}Gkto;EiERTp$K0##SYO>^xck~~Q&CVm zoof00W9IbxfGtsW`g8RT|XO=J^LX7I?{0){r!*7m}Zv@Rn0a5BP)Dq`3`2`ESo z9!^Hx=NuKl!)1MQiSgF;qS9z?}TqRwjtDm zkDV+-gkVD%qdxP_sOg9p`?5&`+t7WQFv}{x%?AU|>oHLuK9@`%E*V!b?m0QZj<1Uw ziANOcWyCUlxegv?r|9iAiPiSA(^5qiY-1JOAQgH4oC^Q#WhqBf9i-NYDM>JlutZ&Z3)va09lriv#V zZQHICefz1D|Ln0o`^6O-kIab^>)t8QZ>wA0Rz4Z?*4e(?dF(whZo2WYUt-T+r@Q}) z@Bix#n{@4c@%-t|kH%$V9E2Q=9_8$nFRsA5&93J18msDsuxAUpv#9f+s7l%nI3wI}>#)YgUi`}(N06xY zXJyiRCqKl`NkC`SJDwm()9qw87(9#S^O7coQgjQ4g#s9A5I|wzz23xKWh#fS9N32i z;TM3Jka=#{yXYW29Yy9m=~79K(9m_Q!(?g*4>}@m8dQ2V?r4h*#~?Q|)(hh`P~s=FE@1rq;H%7fI8N+o|`?Uh`kM>r#E%sA)Lg zIwYk5TDnl8nf++b^oX5?=&P3W(C#gn&V#lfao-3w zu%CAIv0wU?nB7)gv#vq@bPHok)mIhW(e-}(?yar%kNY?0qtF`ldz4rlYGW13PE)DZ z#f)@S5ZsGF4@zi@hZbf^P16oTX1D-G3k(c_xNRy7h-J~j;F%)OGnyG@mFzK`;_8AMip;>*2Tqi(ctIp6i+J|OnlVI7)yx_8McQ`zP%=^p*5vrDM<7Z@ zT_L;@n$+vM3IfVQEy%f9Xii)lMzaBssj6HEGj6Z4YI8kIomKPpC2qvf?c;a*Jd_qA zNr>IvpjRab5J*l-3#>T&+>1E{aYnK9>@=B2&8d>aV&Gm}-D^%x(!#v7?$2$#p#|No z?a4h`2U1Fb)}f)*m{|s!)*7M(o*dl%?|fBI=YC)7$O2GDLgNe=t$0d^IwaEy^E0aB zWjt!2!L4d}APQ5y&w(cQa&v_jh|*raJ;Ut;2?pLnyh@j>^S)ODA=OWIj4up740GA3 zZO|A@X(w&Mo=H;xE=RfXfST5h8wxb;{M^hvo3llrsrttDbCcpXIi6F~SE5Ybw1?fj z6PBcMhaZ97_yCz~gPL zC9a8zcq>>|LvOgYUKp3_wX>nTv_ZkDhDH9NstWQVVW-uJ$3$76h*>S$J6vqtvjhsK zvC{IrgB)(Y!n!Uov-kK1BXz1H3%TAC)QJ!L}m|}ezq!C5;4>VJ)`ltMgnH) zv}dRAVW(Ll(oeEDo)BXpjQ4Xk1!#ImE{bVEX0sM;Kd(Eil&Mk%M6<1*Rb7QWzqvNM zJuOwpwW6i%J1mJ7VA~HAT}Nm zDAifsraY_jKQ(PyP#eI_H9?)zb2ht@TDuYJM8yxm6K8FKA(x?Cea$duo|$IF5+KwD zkY?tzA?rXVeHAGMo?K33|0F@ z4Zlnz0K6tpHMrN{;y`w%!N2cEPcyH0Q0*Vz2LiY4L97pb=R$oSm3ScwXR&+1h4>Yz z!WW>J=x#U_=?34X>ky3jKGSy>2D{Y#7_~itHl>II_?1mfi0^i|vCW_EY|W%6IDHLC zt3S2j{qP3xjj*6S$p_6HCVw6@uHUySp(aNdglk5(L5nTPaID81?B4ar1ldBq4RdbO-uXN zeeBz+5nb$|Mz3;kU+jC5J5VG8?J8PU*;OJiRPg&iEyZ_L$Q7Tk(M5MrXZWYoIVBt6 zj9ybXRE!$!>QBfJtUs7r1+PfVwt#Lreu|h(fO-pT*zqgLYymSSnuP8e{;XFvSgDaF zx=q+rxylMco#i>}QTTc#J?Os^wo81{lmZN_j}II)%?)EKlOT{P=*ADY63AkFGLmK z(t|>WW&n1mmLv@g_O_m#Yn-gN(5hB@V2ZFAKIHqtJA^&0$gI>xM>)UmI9YflI6wV* z_P1)rOG>iM+2=LbN90Y7X}t~w9D@YqOnys~x8optioJNwsxmQe-UsHb@j2G_%l@K= zgA#xI;b5fCg@~T0og`MCIY}0NvhwHkjcN%S#YsV3hP6na(|RON zur`|NyY??#1tL=7Ir-}AR1#KDrdnFK=_s*KoC?7j{%n3H#a;s$ynL(HXrWOnC^8YG zSsD23PihD_7w7{!XS^ETYeB+ijX;a=JR#B1sq2bDx};m7WbZ`~B)#zoqO>lhMJB{} z(_m%;@3fAkeT)RPo-t{a2Z z5mT)=QkuLF*Gid@PC?rYR-+Ac$kXUN4>koPK&%i=#o>u&x?%*Qhc2H56_xU=#B7mT zAW9qCxth6f3b?$4*hdyWh1?}Y|8_JN- ztd!2$w`Ikg*3H&1 z8YQ!r*;K$!mD?158}B_>%wDe|!*I_Mkut~vlTC%K7z5=PrUO7KqhkD>L~lpvZ8}BD z?>imw+`H-7oD*e!-z^Lp@=`I2&A~M3nM0stBm!JK3NpYlGQltmOlX7DfZ@AnA?`{W z9Jv#3W#mY&WeM~&RaAwnb0}*ADkc?D=pLHbyMVB280n&^#d%)_m3T$f#j85l44j=2 zy=T)@S2I#+_t|gdKpun(HXYGcDdvNB(2HqaT;E^#{O$WI<9i)@=O&C_gxog-ZH&m`3z6_RwO0?57~uo<0 z!8o+3hqu#hGIv*(b@)| z0K;-hPgwG$gqD)W8Xy&@2C#v=Tu|qosVszcf>7sPbaul)7oF^giekE4JA19P^$R5m z{&7#EZwS-j_Xqp0TOgr$RSJuhWz>2}n;ea@qO{vEuNg8#-SCj`!O;SY>1@4!^q#?0 zTS?z-{(c4690gwQ)td3ivD|PH2{^lhaJX}twOvi=ljw%R3$gGwJb{)F4d(FVW<}=k zRm%)i%ewV1So^oWpj-^xb`nqWI;LYvsjk&9qRxC)(}DfM_ubxDj|hV!_W>gmzZeNR zDa@8~QM04W@0qw^f-b~Z3|3r1U>PrG3eViPA}KY%xHVC?8{Hf{)J}bm$v-=gyTR9G zs|GanS<6DZnXL}ov2+hlbknMEtX!P|49a~3OsQ}PhTtlF=7eTY zVrp4yTPHGTpdWYQasBH7#sl>?7ph&?#RUDft@y8u*h9BGKYR-vKk)AFuC7yIcZB=& z4g}xW^!}j3cTcDD=4UIuU9cY=jf!l%ePimS^)pPG-L98u8p~4$;H0D@=23^10@B}9 zEd(c($cmc# zg%1Q`P3GXyfmg1+&e74s`Sk9cR3{htK!sWV>O-$2LOl|#Z%qSiv=$vGeoW;eiU>uw zqdyFt_^z`nRz(ej!(lVDYt}jd2-PZ9c$`L4i$Ch+4R&Mom0#taLKiOjAB+SMRJM6LzGhBSmEzpN~V zhvF2rqr6&o4UZO(LPuUm;zbEr_^#YL*83g|&`9Kv_~$GCW4^+p9L+VXh#eQi?QaF( z-mmL5GeV;58rD`l4BSW8)=I6tA)Isj!*6@Yw$rx#e>%!k(8rF{uVqF*X|Iduo?I+j z9(w!P@7*2I?C{p)=fZ%OPoi&p3o-6!i1^`C5c8@ZJH)?|vupF4`!_37o611@M1Fmk z&MU~pBlu6Zk~AsWD9fZrDD8C3(&p zx8G?AV&y1N%$P#9K-}A zF*J4V$m$#@g&87P2q~+1R9*v&^2(sX?MY<@2u0aJLkSjp$xd$#LJL{4nlFYXPL&H| zxi%Q~(yc$xn!2T0Vz=Hlet|gN(AGG!>=-#3K!XwurDW{@d3$oLU(<+;d zsma~qP1-p9s@yD{woOBJ30!n;Stv)CW|h$Xo5ZikcYcj^*hJIEsEFr%`vjeSVIQ_c z_?uqrPjzu~JbV1MWk}(qKy;KbChR)sY_xtZGaxzq^;Kr2`{6aLrQmCu-N}B%_rp#7 zqWL$+kCN*U4IQ!$=$pwizvVWM?+sQ1{f26N3Bpc+*#PHCiEF_flbLAahi33Zp|B2j zuIqK3BCGX>7`?zXZY@^Rl?1W8m?ShchK^_VMsw9waAm`dI&do-4bSd)J*ti|*P{$_ zfHZ!&*v*afVTR8nMzL`tUMP~!AYaL?8w(M#UMis6o1@G~cBQ}tjHsqOn`LM@pATE> zso_eBY76kUYO&FS32sD{DXVWJ{*U(~qDPc`|5v4s2O5)XoxJrZd&w`*CArtHxCWm} ztNCTNS1qxV52G4XTQ#nFuqt|C>l((ACZ?Bk=;WGONI5kIO#0FDOd9oAiC@$)$q zWe*HI31L;Wr!8Y&MiB+ zuqx{uxh@G~$0_T<=#}v$XD8C_MtmNw{f=`kbxx0yE*WvRzf3E+n0tS+h>(Y;u4EYj zy&0$c-Ug-}vFf{Q4NWikSvIx!U?6{7`L^4LE17K5gVb#m?`Vqf*N~r33u@#vr8Xzq z`^Nx#)1EFLimD3cj~!Nj)%4!}6*DJ-_S9ZCSYz7e#HFSndpbsa!xmqz<9#DyH)I!y zv+NwuvAsQ?6|9{TL@5?s9J@FqG&!Wf7d9YKX09)R(O^3>hUX0_Qq95iq-U~b6Lbat z@v4R)@oqo(Dk(}mwlk?aI%`k46{)xOsuQwCt+_#F+vhqK+t?}F zjylu5N561G?u)}9c#G(Zq#cUGU*W&%_piKWk8oi0!uW%znoiQ7;D**-^g^zU??X5U zC|52j7qCjJkTk>eGvUNBA5=JlL^VZ>f|O_?ODC#bKE&*tn;Hf4Wp1k^TQ)G&t}A;2ek2Ukl`#>F(hQt;|I9Y_=VKu^~V{x zD)7#xg}uIBD5k+0aTXMDcGc0`-r=mf6XjXQ>w)C|UkcIxp1uF;2X({(9$?i%r(H=T z1G5Mai0X?pGh~}mIx*f3Gc%+wh^2&R%?1B;0nk4J&YgUroLt6)iJ^rWkp`lT_;m&} z4GkkSvYwIpRYQP)xmOobb| z#|La zURS%>=i7JD&}*9qYl80j#e=;}X`?_Am#+uakR00X*<#X=of}p{sBv};pK%spdH}ZQ znM^fK6)ZaR{73A}HHYYtqn6Y}Z>NH-7Va7r5fA)h^+-gl(E_+q5=v`Lc0fN8Wdzg{a*-%-3XRj3h+l zbV%#Zi+`$Rn9OWx816man63;NO$z+)xYQlMeTf>V=+$=@MzUJ&fVW#8jMU&%+&2F> z*_{<#eU}(==(Zb&Ck%cdy6sLc9fdv+jAI7M_>hMfp-NUD%6S2<)RF6>!fV{7Pz$e3 zWm-Z587WF)<%QEDr+N;{hMXj*yVInP48`eO5GL3{myG=Iw7+fXw_Eo6T(yeF0^1JA zU3=S{ext4~6AE33W-s6zrJ+gu8v=}7w+ckIiq!7)HlPv;N= zL|DOMlmE^C{hFiW{uBI$#~vs{gA&-^ts`!Q%suC zy0R(VhWEeqML!n#$H{Pid4!JT`(vqThx#pkFz*`g8x_e14ow|=X+_L_RdxB@Rdu!F zDi?0-o2tKdF6`cEmxHxi3y!4i(Mq8Pr~9rET|3XA_&sbIPHy7A-9YH58+#jZMEb|> z-bWX7E16-_Pj}&>lJkqNJHFG9t+nEHnRfla5gz|I9#f;X@A<)m4v_b0Z8DG3@#TwW z$njOvn2ZpHAx@CH)8LfWk1gF@hwC0;esR`G=HPuKwjYf&I8SzyCq5zX7%3qgqW!-9 ziKe%csrO#eec*3WBl;sH(fb04w(7|r>cgSf(ZnQI8eOec#rq$NKv)v+u}MYJu^`Th zA@w))hl!6mV4LAks6~{vAkeBbBHhnTBycQS2_>nlvo?sT;heF)Tgi)GN-hT- z(3Y9RpF$zN({`l)CtVo2reWP1`cRDU`QQ5@-{kDxRuK93y2&)nKL73S+1iyM^=DVs zZg6Z4?XoTK%D5s6@-vLuX}S>7@$=5BM{BXEuf8${7aI!yTHmLBY4EDn7IMV+=kdem z3eq$n4<96;mvf8`NS4xc160FTotW1A;qzx2svotp(;)HDiT~Cw0?M`pM%iIpH2VdR znf3>Gj@VrKS<+Q2E<2tRV{_XAf~a; zlcnH9?CmOH`|w=w;HY446wxrn5Th8{!ok z#$=%KBlK+lHeiFc(}8i*`(bC7TI_$Fzu$709yrwBADYV{f~B5yP7EjlT0)zgx)@jb zb>|uPb>QWAt>iNrz27&eCP|1pB0fHdz3zT9>aj_oAE|NjmAiLLO<%_&-pk{Q-;dx% zCX%AG5lwGOS13MAha>c>YH+Lzh2b2=Miv6t8An0M_qm8$QNa^31qD50)t!OvbI zHrMoB=g>g<^oQYxG8Cqb9Rt}9suQPAw(0Sf%e}cV0;?zv-FztX??%s?qMb7z|1m~z z6kxAC1v}o7hO}q3b&z4rHx@za-fDDhG9yOK-s741Idc1j1+#9@7c-E^g@lLY!aqTs z4Sdh3iv~$J*sT5BU#ohoT!7_C!WhhASpfi#&buFqU9RrpEvSl_2(}tX2pxrkL8>cW zwh$Uy!Ko`TEgk_wiv(rnHEM@Y@klF3CBVeFrY*Iafee^>+8P9LW?vFDx*R5>nn-p- zQ%DAtjL}a^SZP0pE2#-yc@}Em*2xAyClxDrXMf?f9dH=5vIHDvh6;|ylMU>KylYh} zgnkeb7EsdLqUp2p2epwY4WRgPx?eELZo**NkrdDJxW=&%c@Z>(+zCm<>KNc_i1mYr zH~Ok<%=zc6A4`DI@i(=5fI}7%+nB%b{U@)-!%b(^U!?<-!~a&7sDtWde-qJL@ieQy z@pe$M2mlf5oddLRu`%>CG^#B@(Q)bS04C%0BS_@Q=1pra%g&eR={s)z*?anO?!)@v zxD!FeUtRP+H-tsZ&)*n7X+PoH-+wcqOZNP#16Y=?7vJof1mgtn!i#NLJK5Qb^FGPm zt;8PN`!NFQRKm1jY3thaFxg?bywQC5&qPZ95eyBSO2B|&{(g&6C2)IRxm|HlJiuPV z3blQ{4rC@gSPZ-8Uq`2nHcZyo>ClRO@Y{p7Zre$hhpFak0a3!${6)Vn)AczhHTkkU zmcqQEH?QX*TMv% zLKN)R>43GeVMEHRNBkU7kP;ZQPY0@|gqmXX>v;-#3jhrv`_en;c%=cvk_JOXpW&q@ zTb0d-wWPCtcpBQPuoM!FT!?D=7vF-__oG$);FXy6*DX=2b~!qyBAIkD4p9V2KB+aI zrg<)~Bj&_4yGxCG4qn?f(gxUeMvSG69pH3}IyG=rVV`6Nj=aJBS(vgX`k`#<($x)) zrvu!B)*A6X_5bbj{Qu(XJ%gHD+pgg>LJ|Uo5FiP~00HS)w92@0n+wZ{By_>qjPY{mja7oaa&2 zT2tb^5VP=h>?a3>CDXH#DM(a#V=<;#ssIUgJPf0kh@}+@s#Rg9!pty<8G=ea8O3(Z z$#odqlBAfopS}@q_ej!*U=VZ_{af?&E^25uwY}H%^5L|U+{uCGSZ#mLR(xB#)^MrF z+f^D`_kE7Cl#{M7YfW}>idBSQL19HR+xXJ{&g%Yr2wRSw1$Ph+6Ez`eAd)J;L_s~? z&&t7|Aa(%tIwT3o)db`~G=#@R3Ks^htKqv!8%&*z;2OdyYVpI?)ff>=Q!QOxrfxXx zK0HoQJ9<1s1L>n!at6Un)B*q|NM3p;3KGnl7;tKJstMtxH{7p$A&*+_UDj#2MFu~&>?1EuF(LvQ3bvEw+UU@gyHq6_$DnlGn*S$o`KKyu} zr5#fc?0x%Du@4~QX^D1xvSf_iTaRZQql0`L7mIg^Ada*7N<&d1*{w7!^i#zQA1Xxq zt|}UVXC+{(|J9HEa#--2|4XkLnz#;WXq8zGICFKiIAP=BS5YRisq)ygcwnv4 zYv8*1!p`+?KKopljwTmxDV(!}K(>oS*d|Mt?vjXFSp?YulUL_B)n>$J+{(G#YNV{p zMBkU`zR}OsDU9)mKgNkG1_3G$%X!(8o2zJXP}`b{liA=#+CwyINT?Ne&HGao69Z64 zp$i!K=thiJD7DmZQ!DtJJ$cESf{$mL-ZBGA^?GmTa1rPnX->^kY5Wv;lIN(4*UUy< zj?fhb@G5xb5PV9JwV>4&4+ZP-x|0Zd3SaRZKfyh)3I_%EZ zyR@!d`4bz)?d8ihOfb{2eJ_I5as)c@w1Q)!86EmaCd!cVq1wqVe2`Ewtdr<_)|?+% zm~eydASMZa{tUuB&U4jq)-Ea>AlE5F(Z*N;sx2fI3N7-^Hl&>lV8k2g;u^VWdP;rE zY1I~P7*Y&6$ozNwe%Z{=R-iA`xkI&%ezu0FMw(9OlN7BcSIvlM-vE)btA)201Hl47 z`C7G^iH-6MjVSb$)+9^MGMjJakLrxJDBnyxedURHV#};H`NiNm_2oZ=mnYD158wRr z&M4~b@h*kjfslk@Tm9wqXDZ>=0arbE9)`D2M7m90*(B9-Hde>ENwZ~Fd9ztcXkFhF zmU(M!>S}S*g?8PaM)`oZvARB-M{27f*h+^=YFzCYH~+i|^cGu(YM`GQ#zew}V~?2% zi))SpIDf%+wXz0Qmd3Jz-rW${52sz`__=u&ep4(R?B+e1B&8s=X?QDC=+K_u3)B-l zTBRB#1l4g0^mq3QyBLFJqT2)-~wfwSY!U}OqzHPP599=Y%w0RDvkX9e0T=C5TUj<}gM8OLbe~}0Sc%Q&$)dn#VQXN)XxV-bLbVLb^+1hWC zkp6O~*`_tR1pb7KO?MRzB<7^)8G@%z&^Q)u9v@?_nE7;vrA zM5Vyt!s`!z;!I;aZ9Ts^Z*@|+qh4;iFPPTtxkqz$rG4FS>iLzhl`GTEZG!QSai5JY zJ<)a>4*?W*MonE(YXX{>r9aQ;(xft{4UO@tM?CZpL=lpJiq_J^EaS zV(lZ^AD=?Oe${=X1(z#W_u(!dMKBng7zG$}TLCWbEs&q-Xr`CbM*qhlk>W7uPB|)0 zqmQC^z!y+15DYQ}hTv)x_q13bgc}Vca|Zzy<0BBrSOqq>I{+ilAAp>Q0;q!$00>tf z*uotHD4XxG3_}3UB|x|=I4J8*9Zw6!lQ5(LdKpAZkd4@0LoQd~XGZ6tXE(G29Ri!( z6qeVJO(Bny2WmWefLj1~VNq~=j4HcE*9YM(Een^ZN*$7!FAWkELwI~hJ?Il0%+Op& zoeZur|=qE=k=ipP^L+n?5tbVmJ? zSP|o()^tQG3SI6TGaHrBaH34AL9?T*;fl6_heZH6x=HigrXdQy)iW^Jz2}gPch9)5 zHE3HX#^Ys7*OT};PHd(AHy@Bl!MJdjo991GKEua2#;M}aXlr7wW!+_Wg9_*GUT&OY z6~wa+5s8_8E+0pVh{4g7rr!-3FFVJ+4z(kLpeFBv3+z7;@29Et_p3j>gqdLLODSK~u+<8BC8??AVnl4m zfdqQrd&3?viiXE)?AVZPX++==0j0%g%}9@V3J70%ugBU=(jCg8;yk; z3mFnviZ%40Z|***Q50`?-�Wtw9a_DqdUPJDCd^trn4x8p4rgCm6^=u#}Re;hV)B z&JE7at3_r*PF4JsX%zY9Z-Z-FghUxGnAA-NwxluA;C)ayn6ikD2&39vqrA`tzAC|> za08TBWefi|zCVwzTLSLq)<7Og{5ZKxug>IG|J>Pi;@u}huyv}_HBAyYw@Jsjh3H5_H2S30iQ#k`i)t*by-Thg^`fF||F7hE;_6gC?Qo7L-Cy z#uiFihcv|$Q~Mj=<<0w5B)DP@RBcfL&bCZP6KlG?7Dl9Pw-z|tU5s>r54E7fT(s<=@A;cF4nK3Gig7ARni5tHIa=rShKs3se%(nk*1ZrB=YLKjkd5q2alg319w-1h}Z zd#pf?;7q}M+~Z$|9Bid|&q~h2ph&qqDWp_w-HW$&GvMI_;v@A{TPNM?iZaGgtV@32 zb4sCx{_W+@UIf26s)TsN*4y#Oq;jXF8P=J1wFG#Yn%aH6+*-K5)qunbHzX&}eSi4t!>oS0 ziW(jY2a}SMHJ=+uLq~iCHqiTER2r-Xv;bh#3wFLYKNh?s+E%SQiAddlsi@gb@w(>i zbMPuL!3DclcPC$aWG-f;8^0@bVAb3-Bx92*?dPMROK~R+F3`-|c!ls>lw6*Xh=su5 zxQB;j#{S)(TuvZU>Ck6$P{U~)b5{4Ux8Ndc@XT03$#c~)c6yK4#$S~f3}I_|BDPTb z6jT)e?q^w}Z`qYZi(cV*B4M$(itvh4o=4CF=Lz&8Q)Pq;!+99t`$j~lY_P$IFTrm1 zPwOpK`lUgSqFYB+0`wtVZ|77me#&{xKa~l|^jAqG>pk?`o^$pU*+*H)-%st2s%I%a zm7AJ+lHXMk+k7;%s!gf$Z~M#7O8SA(AFhYg5=%cPa-L01J+2r3)zpx((b4|ofTHo} z!AIKPGUNYq;Z^K<-QJC;bC3UXNOH^A;O0_)+W|A$==LiK|B6BXt47(37a)-M5xO#y zS=5%7&i}(3CC{w(rY@d*fTEIYQRL&>c8puf-73U%FKNbrqjZiH?0<%q^%3g_ z59#RMfEwejeAW9Mzc1r1TB^b}>!XOFI%P7Xsi~|9loME+*2ibUX!5BLBu3tN67-S8 z*EL^Y>w&OrJ?jNsQo@rH7lXYC8(fz5m1OxUVU#xvPSMOg+36$Gy3pT_^gptEXUj;~ zvSMSX;;T=kj@lD5CVT#htekNmPt|{JSiKk<<`OkS!<9NNd#_%q^L-E(m;CogxyO&h z?jtnG)zGMNUCD}^*UW~e*|vIx!)CEl-Z^7V4d3=AUQKL9bkYPl@<;X?cNnCGy?DRO z%B=M)DO!^#NbSbVJA|deI;sZDL=gwUY(?F3g|Yq<+KuS z?nU0+H1E>v=3KH^s)AXW>*LTT2809Xn9N6Fk@Xox=HY07A?mB4VUSEOZ4D@j$Fx%> zpk<+GI7|l84{DHt*+QyAq=D3#{B*P>m5Qi7Yz#+d^@8MwTa;F%C>7n;!`OP4*>K;? z^V7uGny)?=Z_1D=KrnV)53M$z)I##G4P541jVvK{P4O`^kAqZXw^Rl60P)f#; zX;2|xjyF-u1+UVT^#8qI9WJqW5BXwRh09oj+hH)yIj);RKJWbi{IH0FhzD3OQL2r! z7wEPfmqhXa(F-G-?&j&0a3j=8<~i-9AHP{nO3#j+yW{(J(6tw%39TRQM)w?Kdf1Lx zYB;}tl6Yx<|Nb!jm4c?cvq{_kWM@8}UjOAn+z8vV{fLi|_p<8Am5I^U>;1U-n)F?H zVHU(gMa|n#wXay4~@kGHRG!HtEYybp)}A<-9K&B1g(#c3+p9L-a&&ZAmbBo|FK_pm@m(5p#Q_ zDgoQtUPi%yFJva1+mPY$Uwj1?5`%yVkP#snh6Xqy>xs9Vf%bL=>f8z?*r!?-u#s*= zP@4J_9MM)dZ~Ky7kh{r05B)~^u4yppU$HYjoxHGr=ifnnq_?$T>};AL)DaD%>GjC`Y=^%=R-H z9(uICF^VdeVyL7A7tzyWtNj0LD^o80P?OiFAa(GNU#CI(liN2QLwx$*K@|%w59$_- z8?HvQ;pik4M%bk37T>IECB%c-6Aee^eNLVE-|JLd1WD=vWx{KnG7E5fgFbDQH*8Eu zShIlKLt6~pX9%)%N5k`kF*C#R=A?MPFQEH_AKiNe;)R03!xE5!y;OT(J1HfvPs~CJ z57UFFt?8)w{Z&soYkJaqFV@&9K+;1xRFnk~1X?9~?EYscBiI?v6__gNlDO6mbQw4r zGt|e>>T($mW?HebUS=h6DrI3B6kQFt1eFdn#*<*21dutSe^qDeaBzEgs#C5A)e0E0 zt2I%=EH_`Bz-inYnyPloqvD!fYPUWNsu>GgV+J&O@g*geI0%#*hTN2FKh37ph&bI_IYMlHBWB)9&oUj`iq{{vGF6yQYmiw27xmQJ~3;b}gch45t#4P?3^nT@Vp3oIytxAgzVv%7l#J zgDMKr6agxqFsjWfU!ncs7XiN3?a`7#s&4;l}ee3d>X&b6@da5nG8$k;gg*kNiDV&2a6Ek%-Ju#L zo6eXG{5O7+1(qc6roJeLT@Vku;}of6^qRvo!ndrizR_;>d6{_e+xqg62F+4V)_@t@`QX<$4MK5$KC}N@<9wk1 z!ku_rT!8dW&qGO{6%tq4Z@Hd^=d~Ukcb<4;axD4UoptSN^S-f4YW}(2Yll^2wdkk* z2uEJE02KUhe{*wudf6}j?5iVNF4x2L|7S+{Uk)J~w}rV?yf#&TMzY8DXK(i@$&Urgjn#wy8Hf53&E$d#=-6pv>~c$cwjG+o?g-`kOTF9kNUv<57De4J*A>C zM2Vmct`)R$YYNEV@CM;Ay_9@QGUk2QFqxG(}KEjiSYV#ci^> zd#MS!)Ay!xj3nV(tfsn0wYZw(=c7qm3z+`(W;n;0>}e;B@#$!@sg@D@{%dC>rh8}} zYfl*cn=_^3722pXNI|@e;c$8%TwcDJ%I#*i+2y|Vg6Mf11!W)^$}%NTt_^@mx08~S zLV!bl3BukIFcJjQ?Xzu2pRsTM7mq_GVidp6rkANR~QCZyglxi9~RU{AwQ zcz0~*_cDg;>|C_On&EHr`Iz_pVV%U}s@cAqkLLXZeDBz!(*s*8TUFK` z9t`VY^uKsl*W|o761g-^)_xT5?o-qMVEcR1hC)JZYFR4?`8$2HbffSju6&knF!4Y> zf!7EK$?}{^UkF|qs+Xg;LO4b2$r765qhye`^%x{h);Rp@P6d#_RG${fa;#=mRyW383AlsXTyS{ z`rQy;q%34pUYPk-4Q=usZsvclr=+Bdm(u>i3xZMsVCK_{PeBAHr`4B~EgEQmF2Bt= zb6C2{Kpaye52EwJN5uL(0ZG`ONd?Wbbu~GPy^SuG2e8&Q2hxhJbktf2C&Aof=^(Cb zo)YLcz8@2qmWJ@n`*S7+>5uYxAk7=BFBpA{Nm+bz*T~rPmL?EE*GHb422qgpLbuKi zquQ)SgX^!dM7FtBx275bMU4Ac?QEAnaM>yQ(Xl>r3JC&-*r7{HH>zazTIHWdvRUH2 zSB$J?cU_oEj5EV;X7$48c3G>*-RX14%;Xsdv+f1_m z7C-yO&nBJn?Rfr=F(T+cO6B0@-}p8oz=a?UJnr6bEu$K*2ddy#fX^0WC1f~i2o;pU z4Xh&kDF`yv(OmUw@np_d8GH!*gXn%!DWfoBVfd_0Hwi;P2?RuI>(?(S$AVlpT!A$g zA`Hwe%uZ$F<(#IGVmKq91D9sgh_gZnv92KSG6)34kFmPt&ccmK;I1gXEq8U7tIqM7 z1_7!8`!@ArTSbF)mds=w*(oEmH@AUvXUIB_<6h|8Q~N6AZ11;>`@Y#=-%?&m#FF@$vYQYaNfVdU7T3E>i!If(vii+<)+Sm25C za2Ut>T88a!e7`3#fBWMkRvpY&Sr}22$$Dj=B7R{;bhJLI&=Wy(7vY0Stwwv_rcOT- zT5QQ}s1scG>Y9(JOu44FL*vk^#Sz_$&?8X_k-x5Nzqdn19L0Y*q^%%daWbxp?78E; z>eXd)E*Uesb9P}~Ym3z-=}=y`TvhCgD0FO+$ffP$KIw;hR9(d98(x1)qd~q~>dB?O z3Df`3-Mp{igZb^^9m84%Py6Uuu;jj$>1 zvYqx-C#2^&xoRyTN)n9=V@(ZXL7oALwK%um$g0on4M- z!-GKI(i?Ruoun&L3k>kyrGzTr0p2FPBIE0sicbY|6X8MtUh~oxobXNP8Wi|pq1F@c z^#r1d=bA^jCn`u#1RCPDDsI16Nl(atv3nP~RMFNH^UnUJcxpAq*%U$_2wT!|YTCFY zw^Ic|I`NwKV$4C+p+Z&^T&q{+7S_m9vzkzZ3uCh)9FS-ChfUt7vH-D{+M4GY#&v{V zLBSr`Y4`c8`|09r`xe@MkZRv>9;Bc!o3&5Z+!p;l0-d6V?%eD!+ezsGJVB;lI|EWz z`=RDHzHj5bEDa%FsNJFo9!bej6GCRxYA0U`sPEcyYAR|teYiN~E&uu9D}F=iGfnD~ z?7)3dy@V%xLtn^bbfGTxqMq9elhC&C)quU=2m6ya<(b~3QEH^Y7wm}3E$UO#Rm0I0 zk1N*L#+jW__G(#O<*)ajo%(p|YTX+OB})J2kbw(l&i@i?=FeL+n@0x(uA2VdISajp ziw8=xyCSciU>+wVVPos^VPGYXhD%^t6H^UJ`5vKPX|0;5KKy80HjDPs*|6b}?xWKC z=UmR|-0b7HowYrp(N#3xV0!I;-=BY@`@bKg;9|TgNTT-kft9>1cN+9Ye(97BT7I|d z6vAV@w{&BUGXp~5`_(SwlS1R=D|+@M@5qyp7Q~kG7s3~_t#$OGDnUVP zd3SzB164^F;}K!%Q~_x~st~^3jVi+U5IGGRxdS`~FFQjlBV*v3{(;Xy)b*A%v3XDk zpB*HxQu)E?y8%6T>BFG}lO-gVSEH8!ODO^u1S`f*N4 zG5b|P+UuDPt!vY5TbvefL+mrOOGEnLliLS1J1NTFX|g+w61^T)#`N>MwDPnwZRM}C zCRWywIaV&`4)1Lao8(hWXHSuh?rYl9oDvDC)oYv*>sA9(f7!x4Z1{NTShLXNx_)QT zU(Dh8ml+dB=W+v&d0Ritu*&sI@Gd_wF;gkA>_7jq=s5Uz&0PP7;~qaOTbE6f>Iu(J z9Bi!e2!8c{;)@)Y0*ShbLgrOpA;3!Ip6sn2k+}83L?Do0I}|_It-xS(veOx12v80g z;h1`}p@g~((M1|reb!*x zh>|^LLih1*z(}El3nkJ{#QgK)N4Do5Q7u zU_r!GO*&NMfKQxPH8w5%I(`{SyyWJ$NSjt2T%qZepyjq=F$9=6%sI#k+F9KX^c$J* zG|cBzJ2c`4tM=5ec#;O)!q9Kq7XnI@_dDdQvURMb3;!Fx{+4v~7wQaL=qz`pSKLvi z`_5U>-C3)EmbzxS1+u3+Ho9Hv57YD6I>OjV;WS1V0^?_tGsTCioPJ6ksyWXEyB>1+ zx_)mA?Oek5DEhojOT6ycl(scIp-DD8^|-5NSL5nyF*zyz^VgOaQ0>Z;+79ne7hXsP zOpb&_U38#H?bu#-#i-6hICJV<-$5F&19$SWsZNu6=8ojEmt&R#Pye}dELoDQyY=z? z<+Z>#R}OypPHxug-P=`nDsh`VXk7tmqw*&=?p@fUcv$rQ#|r*Z;;9=)RN*eLJ5m3W zpZ~Z12HuWDcSct~+6i1oC4uABfxCq1ZSmRw4!o&Gg8=YS#gGD3U&05G6Dqprt->^!w6jzy2M|g1eZV{c`R^$T708=FcDuI zW?TfHIA6`ZIQ+r<>r7SkZWDPm3+{WM01XU0rKRfvw`r8>Vd{jg41~dAj};j9f-rp@ z)g)QK+zz0Bq@CRKeO6K5mqz9xP!ZN)5yuehO(tU>k+a!MaTMb8Pz|k=zaK_nxm|LQ z^iP8T-=p=ww3P!|19vB*O2?Ww`fq*o`jD3;0Q-{IZ4DGQ(Ab$Jh3({KpLxgh?A_Q` zV(#NA?4M zV2ho&8K}l(^@gr+L<+GbRn112t0$yTb{|!rC;Zbr(M|AfIl5N?&|D=yx+KN zHfucbxMU#8Dc~vAF9nyV&W#^AND}hH*A|+DCC%Qub(_!w|95=<#TqH7W#C9q8gww+ zTyO}J>R@V^dN3iKYhCd6LrZNFR|A}Jg_3K{UBd+#e2rWv2*$4->1++vewE1~-9^?7 zhCwJQf8lUQJPXEt*^|A=A~hpkJf7}=R6na{41o&qM?xMhruHo+eY<*%zt=@*J;b@{ zdsh-B_M)P4#ZF;xTB0CV@kx9_Mxpo5YoTZUJ+X{P#w<9FdthDdp- zq+bmJQuMkXhPso$yj}-4X$nma>6p3Zhfui8ro+)vU|tlOyvuv!&u6IHr4;wbY{}N=$CN2dAy#BvQ2%Sr5(tfA+u8nqPq+` zbq-T4C0m-OF|SnA<-7h zI#5dP>Q8y3otZ6b*n0gR)ohRZ7Y~HXU9{Laix|9gtn3yMoXf9A1A9}4fu>&bbl z-DnCO`LKgB*ERI1Bg#y*!capk~^^St3KQ1ZkT7D;_vvSA6#fu75bYzZv1M(0}b^}LIn*BmM9(~ zG3Yf{?ONb!0Pmn|_qh^;UM3-#;0bTaYRx2MSIP?zK6n}EV_u{*ekZqvF9p}Ds(gPJ zWlv~iNAN6o_WY6^X_*i%6mtt*sY%ggiQW`Y{0CvQ>}sBMoy8=g@RG$zRqv7i97BG3 z4$tD9yL%)lDY8;U+DET)s5)e3xY}V4TB9QGYS>4M+pep97Wl4XQ6SD`?{FDKl_o?) zyJOPNu{x*R9M+SZG3r z{dfSmw$HDps@z_jJ!CX+p~v;o&C{vapL+scT0Cxox>`+Ch!5=X72f+CXs9`9F&Fu( z%z@vlr8G!i$M)umxzQ;BXLTvKSr~L=dt~*@_7_A34-H zs%m;1H*_j~BAwn-@d(Gh@!=+G8!Un_ghmNf2@xFI4?Wy%`VtlMAk*vAY{4I|wN?;J z=c8sP$PfFpN(&{TJ|b7GCi6etB-W6^2|*T@MY+qg+G#d)S*Z1bF;EPGmtAZn>w-=8 zq=8|?tpca7JCr_Gu9NY@q?T{gw_1|;+ zpzOi=bA6}lrXijO%Y&azeRVr#`!Lpa5^b4sY!A~)J-(CGkUVMB@J-}?8z6TyFzvbq!$Y3)oKYU+1vi>GYZ zEu%fUs3c8W%91_s3eper)CO!QP)^%3z=m4qCx4T=6C3P?g8Gf7!H3e69`sWNxi8Fz zNd5>J;W|lJgv1Bx;BV>+u}Hih0{>#GkgS3?MB~F$@OV}HX=%--tVPh8Dxf25wG)Pr z@Rm6IcUi)kU4=TX;5rCk9#Y~Q^Rk9MfM zt&i%lhP%8Brpw-kH6l{&;F_4H8=2c2CepJM4FZonHTr-yS|lwfC`9UYPv)0CSsUt90Xiyf4~$ zAggaeT>{QwjjmCd*!#>!YiUd*+OdBohgoRt)$8NKwC)v(($Q#5(QT2Ch}P`260M`q zV36<_SPYsIRly`sP?HZj!o)yb(ON7mFO-n2aM9_c1wd+#0B43p<8`rQ2U>im>O(pO zeIJrUDMYUuP@6jERf0!)+%Bz~IWzy!DZEFiL5hEw3ZEdNY54u**uV60Pm3#bRRMJD z7o@mctfng@<#P%uD4qR#%4ft2B-RWH>8T){Runnz%~&{`XR$wR>dbH>`+Y(xcc3Qm z4IZa@&A~>*NHqHQ{QZ&CX^E5C)MpTdihPX{VabM`A`c-^ZhY@fn}=%zEBduk!#gkc z%Nq5UTNHktsc+wN<)kg3SwZub^2t|WWHdOdK6zXI(b33M__AelrG5G8MjY$zGo-Ca zsr^-Z_MX$8N`-IH{1@MkXtn6LHEnByTIZsh)_N8{X7cb?sN4unwRg@Zzi%*cXn8Ua9C3kV(sv^b|!E+Cp$)|Tv) zLp#X|0%4LO+IVJ)k|d8Y0#ik%H;ykzQiRnk`irNthG)qA zFBoGR=HpBB;R*DELb@mj7~udz8oGTTs{x<{hq#-!)5LwK8V`V!H}^$gX<$bCf)Izm zN68AAiagL1MyjLwcl`c3=4DAX+4LWD^noHEa~KS6)4MJLc#-c@7)byL_km?$xmFWlMs7^~m(_Cak2nsU6gEhyr zwq%SY{;^0Bprwn!s3QYU4%2SNeA(W=|A%M$kGc69!MA#HQ$O!ZeEPYp0rRJ9^cd)- zp)V1;SpTr>$i=WDWe+qk-c8Y0j$CMO65b9xa{Qp)+0#9Wm(!yl_sjxKlzF*71817I ze|*Z38E)?w+?e`nx-#|Fp>yRqdq%xu{ch>-|M-3)q3MX%^;NUe18<@py`~-s38pM- z&?uPYfVwS|jI2-L*4KeMdJ0?IRq5edf@S*@XYZGHuwO zA(ZtCg4T4oidAfcq_voOLThNcsLuEbQ&=dYoDm2|Bk>a(hEh7jQVO#UKx( zAte}es|v+YS19fLY;vxIn9gORtK01Y_d2NpZ#UTmY?eI9u9{8);dRmJ>7726mrv(R za&5(6buK8C^o_syVc-^bUH3&NB7K?xRe&PVR5&wzJiwUz+)SgX$7;8V@Y1O@!^MPa zi+gvtm_+THits<+w(Ho9W1Kp-CuO1z(+HjLZL5#Hq4nnP8nGdR6$9-TW%M%7J^r@i z3u-;;yi`GW+@KNl-0jyZESeC$6WviN*Zk<*yYd6Kc9b5?`03d&G&7gecUv|Nd9HlV z)~0}0%+FH3!`ZRTpqx!Pi}n7m>5}RO!I%|WqBY%vWFcGonE?q66m z>FIYN(vxjfXp}ZcL@*cz5lTrhVG-l#0WzP7Wyd*wj7kktk0%|5hN(F9=9O2tbIzc7 z0?!fzyd@RWMmnl@S|fP8I^#FK&l5K+$;!{?KrFb;h`a2TfYMZ5q+98B{Zyfke5zC` z6q(~(;}#^m;?(%5R3^GzFKNXnx1&8Xw4LC z^i(w%*;gFRsgZ@Z7k$*SiD*M;yH-c+O1 zjg2E0h!G}Z*Pi&y>{dU@X}Dt#rL*=fvoTDru}} zj8&N7XmG({zaptkiurCP_Wh9HrW6#-!PKF*vwiar3>!JVx@&kaGe}-iyF-q3T@r$o zgv`m=1W1&(rJvwjuLoxvTFm)`nK-J*zexVZDdjr6uN88QgOWZJ+d1e#D-6$vRrh4{ z!qe>WEpi;qSLdOcAPNs$+uH$$Fb#PG9K_E-o|K<{#)n|i?mup&CCn+rRmO+U@U9I& zH1skv!laNw1QQcX{g^)8OR4#gHwdb6#N>zBH|3*x|FBgkk|mPKg>at%aIroZO*cUV z5)4^zh)!!YGs~PRi$H@wYgu3fep8k1H@+)Jah7PIp#Vm; zIHWy8gKHD|T^jDdtKrT{*6;f_4*un-fBtZT&nJBhm-zh;5<~}_bb!s`>O*Z3{qZoA z-A=$KQUQ_Sl;u11tE@Mr71!i9Ou9XR8gwaEyc8hOJct#tX_%RvPO<~qRd1}qj^|fa z-d2;H>p+iO>#V4-Jwwwg@aWIqNdBA}{3YpJ5FdTr$kv+$2F7P>yXLPi z-H2@78P}4CJ{Hjqze{AlMu2K5?wSbwv5WrgGw$pl)vN^Za&#)l(0WC4~JARZIRZ6$+ zF7KN(%R2f=W1yNxf%yY4bD&$+_Ct-Dkzf~Eq7P?+L26LA%`^fEm65fTdX(nY2Sx6M zhCEC=9b6Y)te_281~ZBX!iaz^b(?BAG0_Pf354M37=VfOkgzL z%ueNgnccL-0z~8`<5VGt)xM~XGS2yp@6+VX{VUL~BPD2UocGzzt`7#Ru#i^UznXU$g(e_UEHtq|R@WJKW|$%r)ZNy0&dn@u|(C#e?N|SDzzNg$;D2kG2J#(VEeK zOs3_i+MJZgZ#`o))a1PreY1gM0{bp?#AmPgI97J8oIU7ZfZ*({dzV%)6C>jEzLVX1w*>};oiEtFE*sPITEj5=ucLXd&^3l{ zK7@J<#GO&}x#izuX;P*6?|i|V5;iE`T-)YD8Q-?S>JY;FW}a^3^YJ+uc-XC%Idq{2!ZIRz`#z;Yh4KUsAjC;hekz<9|) zQuP8Ej!tY6%G-@+dREzmSuN%vNF-Dz+!Blnk&u_LxnyT@Ulv6V>{&CR>&=&zaZ12Q z`$TAt8;xo~_ym-rteoTP)#f@In{+E1kGO49$8@mfx40T(-Zwi z{j2Z&J!wvCl9k_pUFQ|*GHkPw9BLwd|J@gLx;5CGxu#Ee4t<cJxOjSK$rz>0sS&iNg0cv+U7G96x*Q+{z5%YuPY6p^JRP`#VxYH%Su;mGaRMIiE3>FLP!gz(La)e&q16g`Xg!RE#E)IZ z1he{?J6mrGv^M?RVGJLE6w81qyI?H9FmwgFaffjCajmybLU0{U%s&A6Jc=Ax9l2U>f!M3 zq&L6g_tz;eOPJn;HPCQJfbq-;mwCMN)XR9XA`R=z0arU1aI$r$M3M zUJOh1s%@IIE)FSllM_0~wQ0u&#r@!(&y$@!rDaq%3hgssbe3drRdE=tD7#ka;p=`z ziqAUVIV=HmCAN?Hu>3bIf@6VY#XHMeyudGybDY$g^&ZRSd7ZBqxqWpTO>kZ1Vt%3# z{#K$pdP=m9^YZl@Nt1@x^+@XZYXrgb`Z9)F)h(rM;-2oG?V#A#yD5){a{q?192I*# z;~Obe!|Ki~JmH{j&5_DacNDV*h{@h*k~9sFdpL^zFZXiX(*Yp@2DrCL1tTPMzb- zl|oocK~uND6+xREe` z-sJXw_>J#;VvHqo+tUdU2=W|XU;txM^fP5D6?B+*gkL@iMuDW_l-EG2wSf6jTd6JF z00fsgU-f%5fm0M1oc=71)mVa5NY-c3gQ|ol`o>!g1Cjt&ZCuv0r0sG@i4G}p@ll^;{()G z;|Hd!;czIPD<&mGz_QFRe%Js5T$`J zMaqzB*XHfANCf=89k04phx7u1LqPClOMt;ne@jKLD=C)O{uh_w|2Sd)`@+G7D{>^B z6uF$2J{Ir|`amHv_S4^6rp=Be>dl+AMC|zLd+)Jl=-4&tUon5{G8h&|tZN()#wfYH zAF4ZJ^m&>VeAyZf=l2N#3LjR z#(0eR8dtUWAl#@(qVYoC&08MmDlRKB6%`{K=yW!tut2dwA1q!Z`1?3;8Y}USKhQtq z7iXjK?S$)?ib*~KH?YB8;7&W@zH zIVF2D0CNl|Vgq%tA0HK7df7u7a=|eqYbi@=l(LmbPX7=j1y*NRBjc5kN5(E%OE>n* zg*$9kd0f&mZ%4B9xo znFk}khojWbUMv~7qH6%lhnF))(#Kkuwpsf^@gfY^o=awhWZOjYfjuTVHH?$YhDg z8c7-&KpT(ksi7tLhGwtjW2m#PaF|{LU^KdF zg_;&MI-8~CB8mzoQ$BElCailH-%2aNELX6hDl;)YQ<=^QTlJPZn)gDQ?^a9S)Q;v? za@D*Pw^BpmbD#%vEX%09>1ZS;c5Isr|P1NCpcp@xeK4g5_3NwW38c*KMp_aQE%dS3)$>XX*-pVqV; zJk=Z8*Q6W}j))Oxuc)2u;U3S3s4dmi&(dK_T@XMNJ=*rw{WxL>V^?_tYhDbCSSF2Jyrc!&6mP!&K!zPmvO}z`=x6NmgT2l zG`-5R;=hfce}@wfU>i$5SC(G7x;*mnEWO9~S-|LuX|T8EeNjWvoToiR84MwCi-AZk_w_y&w1g@cr%m`}1*K@5lRnU9Z>6bE?A9wef=Q9){I=G;C?MehLAOxLfNJ4KM~l9*ZnL)#{{S&n$9Mn$s7)0NMm4T8RoZRrvOxxJi;T?9 zUkCqWg^gQI^Uo5X(rCt8O*7*g4H$bs12$B+C-MNfhW$qepI*YI4do#LMo=~M0)`D! zH-}GcTxj(1qb!XPpx!QT|HHT0?mP60&9y%L`iP7(duq>^Jvg33wRtx9nsHb9@&qL` zPHo;tu58Y)JTUemm~U|ZzEjgo?PqA^E2W<2Z#GSayn3@gEG}6uE!N7a&|30DDKasI zJ3W%nd8*t~&E3TpY>LDh?tS&Dm4CS>y)(bAD?JJ0 z1xqyWr4=;kXL_h`zEu6U<)Ya2Slri z<4(4=y=*`prl7V%3>f~DbVmr5%BJx*L%@(VQ76Gr>4^?~H3IJWF8bIy6fzb(2Y|8AV?LNzu4m24;| zDwOEbq$iQaj^^q}IEiZ8f;mcE21bZZs3IwfLIx#3H6hBrW%6z<1F%F@f-{n_(-ZnS zOTq9!mulTqC1-n-Z-g5i-3QmB#n5cyY>1_Vcc3b@=;2%uQ@}A)MIsRvI1L#2U7Dr| zxs9d_rKf%{(I?U{a4IR1^ONt7^y7A4L2LS>me|>|AP^;m)QwW{#2n+WV|JWl{2L`D`hMmZDQ)pksU<|?!U|Sg>H#Upm@6KC9!#=u>J~%v=oU*@u*nMuqaTaSD? zKvtLs-+Pj!UA4WWDpT*s`!HPo)bj&dTXPS6p3gt;6Y@`gUR$aohdbs`OxD*?H#_4f zODu14iE;t?jd&Yn7xWMU3-tKHmQhLqu?Vp$GzH7@npJDX1&|fN6uR~4gBX)(J=}4O zAvS9+A&?VB=0&#_I=W=;20_8XPZl5qznjy->;tVZ;=1x<8i~hRGh;JWuHPRxY5=s1 zMK0PMQ(ek}Ry`_<)*|GIBfB|#tNqhL z9G*r>P-w!S5kpYarXOU`R72BGzF$*6+I_LjZz;S3H$4hY5+|SSI z(E!>zu{AgD6VLjgRiKUK-)*}yad)$`J%}%V0sR*K4I2_tX-9vL{gQpggJbHuiYe)9 zPorHdeEIqI0ww5P-40UroXL#e>s!Siy_Yi2Zrl+NnVi>8C;aTsYkXDsj`}GDyYvY) zEc5wevsqwqDSqe*+$lvuHQ~yh=*a}Bs5k)~+$n@xb933~`$coA+}y8lvz#&&!P~## z;RDzk_(Y!}saj)hURC$kjCa{*;L0V6!T7BJMs z?Hq8ihNoufIW_SXo#^hky*mEY-9tnbEaIuZDFCQajcxc2yQ92ZDh$TU`fqa;Dr;*8 z$qmShSBw{7?rWK0Zs4eqc}$^CUZS1P@BiWZ-ELXw2ek~=n-Fil1O`PlJt-T0)H+QI zj~Q4N60moW9wsy8JD(cEFHM%{yEu71TWR|u-r&t*s4l0Kd;@mB8Nl(H^X&j#)RmE2 z=GVLjNLST{NubnGt0R{5T5$h`T`8Q9bn?c6mwPN$w1b>BmeKcwgR|~v%S;{xSzw+& zDr~=6Th*;L3~t9(Je}zRVP4ZW*?o8P`cj_Jzi(3$FO; z%uV0_*Or;4XPG*W*X#XyExa+H<}jr%!Q)9^q3zYP0nK$n!*cQWpM3vkzK>^NIZ29~ z%Nz~m+Eru=`to6DH&^{&WU0cI{1AKgKA}mbFT_6jlhkC)xa1x zI=iNd=-H=Tsc5mF%og|6a@;sI2GtGCI}|P!o8^b|NQ`9LkI#Vndka zUTFYX%XpPxoO517pU5IQ)SphZFL&4#pt;d$>idQIYr9otGdt6>e~sXILnSSHM_>&Cg)KQ?ruURkh-e$9*rLP?HrD=)17X?lawU8MPvMN6oanPZY^G+ufzq zO)#sANPi$N)dp{QpE|pl`Rx$li{{5nOgMPYxjvhd&)+Z%+k3Qh)p+g*5}HO$KNQwq z8RIaJYpBK5G;S)UY%Z5se*sI*+`UkAHGn?__z6~%kap?U$F9FT^WxrPT42&J2hcjy zA)|er4JwqM+PRfz1iR3`I3;=eC~VbY&oNP}t!mhR>(}3(uR!6Wq}x~2kvlc5q;&fG z!?`8bklE~)QW*cPVhj5dl1j!(;X5{!**{5+x}PR33b%^{0%P{~xR>`eDnoL7Vi@a@ zFc!{do9gUe4iHyK9@wncR)rGC=yif15UQcvbD)5*&<&IcR82ajGz=#0Pk z){1wovE+*;yXjkEBl^6F7Q)lWxQE*z5gdKH<)N3!aO3_;3(44gF6{Wld(Y*GCmIH8 z%-`e3@vsr@jg;U!k1q4}{TdX$b^7U^HEozp=XAk?O+mil`*hPXPWXu1b6Q>FKvSFh z?EcCZ>!#m7?aqDM>(}|JtKi_g*Ojar4ugqTULG3traD= z0WDW*u*YvSU(5#)8y$*$r&0QJCu(){d^`Gd~)ven|@}o@TL69+0HK?iW{LsTC zAd+WyP#_#L{s>rGx+guyF;D>yDw>_ z2@ICb0FoDkyo`m+n)vr$GRS^5qhsl8{A6mFO z(r-QQ^=iYffrgn4#XHzco$Dh3)6bVrZFFG>4iksB2?Q+}epcO5Tf_l;APr=i1Gq?E}hizC$K zLP`b>lBdT4ABF0IdUEn0%sy4&^SW`kA3C-Jj%-QO&4uIFyb=(}k(MrQI~7Q`4vvD! zn9Wgd8tfTx*X%LrrF`7Rg21)Npt}*lLJ$G}Y=#tTn~^i;AH(C`pE^^id(LaVcwgW`u zfE9;^RJoxhG_=g}ZV>11X2MZQ7cr=Q>@kD~28hW1HN6#I_)QVngD zeLJ3($MS+6pnRJfbxy;mD$suP>e#UbhC}_BV=7TVruQ-4qe_Yu5?=&x?O6<1rTmfP z&jj7Fc=s?siXa%CXeDl{^lg81lR|v9LO1y^Zdnd#pFMno+?r+V@z*2*Jz0HySOeNe#+ zVGK^&j8jBH$tb^m=25eC3Whb!@z=X9ACxqaZ<8UGru6XQYoMkNGI^rI!^L%^$RX@e z7mQt-Fri4+zcg_TMra7!i?5zfBw$1;$7hZYN?TIf`8oci`%%uro(VIxi*|uR;GGNMQV3tw2OCR6v`THLednisdjS!?~}pRc%7!3Y!t=` z10x~rAB2T96H0x&YqYPC-%s3XRN+r~nXrNSDg>GA-NJ5?WH}mog4WXDfJH)Q&2ovm z4+4r*^WMAb!wn&N#rhwAhR>;fcFjslZOtI+M3u9kL|}g1UkApn_ybpk-eG7K8^-fj zQmO-?1-G3PTU=?llj$ekaqt}9#^4);31gGa3 Date: Wed, 10 Dec 2025 22:15:40 -0500 Subject: [PATCH 50/59] Updated temp file location to use flock storage directory instead of system temp --- .../providers/handlers/url_handler.hpp | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/include/flock/model_manager/providers/handlers/url_handler.hpp b/src/include/flock/model_manager/providers/handlers/url_handler.hpp index e0944f66..714967d5 100644 --- a/src/include/flock/model_manager/providers/handlers/url_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/url_handler.hpp @@ -1,8 +1,10 @@ #pragma once #include "flock/core/common.hpp" +#include "flock/core/config.hpp" #include #include +#include #include #include #include @@ -29,16 +31,27 @@ class URLHandler { // Generate a unique temporary filename with extension static std::string GenerateTempFilename(const std::string& extension) { + // Get the flock storage directory (parent of the database file) + std::filesystem::path storage_dir = Config::get_global_storage_path().parent_path(); + + // Ensure the directory exists + if (!std::filesystem::exists(storage_dir)) { + std::filesystem::create_directories(storage_dir); + } + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> dis(0, 15); - std::ostringstream oss; - oss << "/tmp/flock_"; + std::ostringstream filename; + filename << "flock_"; for (int i = 0; i < 16; ++i) { - oss << std::hex << dis(gen); + filename << std::hex << dis(gen); } - oss << extension; - return oss.str(); + filename << extension; + + // Use filesystem path for proper cross-platform path handling + std::filesystem::path temp_path = storage_dir / filename.str(); + return temp_path.string(); } // Check if the given path is a URL using regex From 9095a0760335f4e2c74af42439cf6963e7c578b6 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Thu, 11 Dec 2025 11:13:16 -0500 Subject: [PATCH 51/59] Fix llm_filter to work without context_columns parameter --- .../scalar/llm_filter/implementation.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index bd6c8189..360073f6 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -7,7 +7,7 @@ namespace flock { void LlmFilter::ValidateArguments(duckdb::DataChunk& args) { - if (args.ColumnCount() != 3) { + if (args.ColumnCount() < 2 || args.ColumnCount() > 3) { throw std::runtime_error("Invalid number of arguments."); } @@ -18,7 +18,7 @@ void LlmFilter::ValidateArguments(duckdb::DataChunk& args) { throw std::runtime_error("Prompt details must be a struct."); } - if (args.data[2].GetType().id() != duckdb::LogicalTypeId::STRUCT) { + if (args.ColumnCount() == 3 && args.data[2].GetType().id() != duckdb::LogicalTypeId::STRUCT) { throw std::runtime_error("Inputs must be a struct."); } } @@ -41,16 +41,28 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { } auto prompt_details = PromptManager::CreatePromptDetails(prompt_context_json); - auto responses = BatchAndComplete(context_columns, prompt_details.prompt, ScalarFunctionType::FILTER, model); - std::vector results; - results.reserve(responses.size()); - for (const auto& response: responses) { + if (context_columns.empty()) { + // Simple filter without per-row context. Ask once and return the boolean result. + model.AddCompletionRequest(prompt_details.prompt, 1, OutputType::BOOL); + auto response = model.CollectCompletions()[0]["items"][0]; + if (response.is_null()) { results.emplace_back("true"); - continue; + } else { + results.emplace_back(response.dump()); + } + } else { + auto responses = BatchAndComplete(context_columns, prompt_details.prompt, ScalarFunctionType::FILTER, model); + + results.reserve(responses.size()); + for (const auto& response: responses) { + if (response.is_null()) { + results.emplace_back("true"); + continue; + } + results.push_back(response.dump()); } - results.push_back(response.dump()); } return results; From 25036f14234add9758d992545506f362d69a6361 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Thu, 11 Dec 2025 11:20:10 -0500 Subject: [PATCH 52/59] Add unit and integration tests for llm_filter without context_columns --- .../tests/functions/scalar/test_llm_filter.py | 53 +++++++++++++++---- test/unit/functions/scalar/llm_filter.cpp | 13 +++++ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py index b3c229e1..ba1ed273 100644 --- a/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py +++ b/test/integration/src/integration/tests/functions/scalar/test_llm_filter.py @@ -55,7 +55,7 @@ def test_llm_filter_basic_functionality(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Is this text positive? Answer true or false.', 'context_columns': [{'data': text}]} + {'prompt': 'Is this text positive?', 'context_columns': [{'data': text}]} ) AS is_positive FROM test_data WHERE id = 1; @@ -68,6 +68,33 @@ def test_llm_filter_basic_functionality(integration_setup, model_config): assert "is_positive" in result.stdout.lower() +def test_llm_filter_without_context_columns(integration_setup, model_config): + """Test llm_filter without context_columns parameter.""" + duckdb_cli_path, db_path = integration_setup + model_name, provider = model_config + + test_model_name = f"test-filter-no-context_{model_name}" + create_model_query = ( + f"CREATE MODEL('{test_model_name}', '{model_name}', '{provider}');" + ) + run_cli(duckdb_cli_path, db_path, create_model_query, with_secrets=False) + + query = ( + """ + SELECT llm_filter( + {'model_name': '""" + + test_model_name + + """'}, + {'prompt': 'Is paris the best capital in the world?'} + ) AS filter_result; + """ + ) + result = run_cli(duckdb_cli_path, db_path, query) + + assert result.returncode == 0, f"Query failed with error: {result.stderr}" + assert "true" in result.stdout.lower() or "false" in result.stdout.lower() + + def test_llm_filter_batch_processing(integration_setup, model_config): duckdb_cli_path, db_path = integration_setup model_name, provider = model_config @@ -106,7 +133,7 @@ def test_llm_filter_batch_processing(integration_setup, model_config): {'model_name': '""" + test_model_name + """', 'batch_size': 2}, - {'prompt': 'Is this item technology-related? Answer true or false.', 'context_columns': [{'data': text}]} + {'prompt': 'Is this item technology-related?', 'context_columns': [{'data': text}]} ) AS is_tech FROM test_items; """ @@ -226,7 +253,7 @@ def test_llm_filter_with_special_characters(integration_setup, model_config): {'model_name': '""" + test_model_name + """'}, - {'prompt': 'Does this text contain non-ASCII characters? Answer true or false.', 'context_columns': [{'data': text}]} + {'prompt': 'Does this text contain non-ASCII characters?', 'context_columns': [{'data': text}]} ) AS has_unicode FROM special_text WHERE id = 1; @@ -270,7 +297,7 @@ def test_llm_filter_with_model_params(integration_setup, model_config): {'model_name': '""" + test_model_name + """', 'tuple_format': 'Markdown', 'batch_size': 1, 'model_parameters': '{"temperature": 0}'}, - {'prompt': 'Is this text expressing positive sentiment? Answer true or false only.', 'context_columns': [{'data': text}]} + {'prompt': 'Is this text expressing positive sentiment?', 'context_columns': [{'data': text}]} ) AS is_positive FROM test_data; """ @@ -399,7 +426,7 @@ def _test_llm_filter_performance_large_dataset(integration_setup, model_config): {'model_name': '""" + test_model_name + """', 'batch_size': 5}, - {'prompt': 'Does this content contain the word "item"? Answer true or false.', 'context_columns': [{'data': content}]} + {'prompt': 'Does this content contain the word "item"?', 'context_columns': [{'data': content}]} ) AS filter_result FROM large_content LIMIT 10; @@ -467,7 +494,7 @@ def test_llm_filter_with_image_integration(integration_setup, model_config_image + test_model_name + """'}, { - 'prompt': 'Is this image showing a motorized vehicle? Answer true or false.', + 'prompt': 'Is this image showing a motorized vehicle?', 'context_columns': [ {'data': vehicle_type}, {'data': image_url, 'type': 'image'} @@ -538,7 +565,7 @@ def test_llm_filter_image_batch_processing(integration_setup, model_config_image + test_model_name + """'}, { - 'prompt': 'Does this food image look appetizing and well-presented? Answer true or false.', + 'prompt': 'Does this food image look appetizing and well-presented?', 'context_columns': [ {'data': food_name}, {'data': image_url, 'type': 'image'} @@ -614,7 +641,7 @@ def test_llm_filter_image_with_text_context(integration_setup, model_config_imag + test_model_name + """'}, { - 'prompt': 'Based on the image and the season/price information, is this clothing item appropriate for its intended season and price range? Answer true or false.', + 'prompt': 'Based on the image and the season/price information, is this clothing item appropriate for its intended season and price range?', 'context_columns': [ {'data': item_name}, {'data': image_url, 'type': 'image'}, @@ -652,7 +679,9 @@ def test_llm_filter_with_audio_transcription(integration_setup, model_config): transcription_model_name = f"test-transcription-filter_{model_name}" create_transcription_model_query = f"CREATE MODEL('{transcription_model_name}', 'gpt-4o-mini-transcribe', 'openai');" - run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + run_cli( + duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False + ) # Get audio file path audio_path = get_audio_file_path() @@ -665,7 +694,7 @@ def test_llm_filter_with_audio_transcription(integration_setup, model_config): + test_model_name + """'}, { - 'prompt': 'Does this audio mention DuckDB or databases? Answer true or false.', + 'prompt': 'Does this audio mention DuckDB or databases?', 'context_columns': [ { 'data': audio_path, @@ -706,7 +735,9 @@ def test_llm_filter_audio_ollama_error(integration_setup): create_transcription_model_query = ( "CREATE MODEL('test-ollama-filter-transcription', 'gemma3:1b', 'ollama');" ) - run_cli(duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False) + run_cli( + duckdb_cli_path, db_path, create_transcription_model_query, with_secrets=False + ) query = """ SELECT llm_filter( diff --git a/test/unit/functions/scalar/llm_filter.cpp b/test/unit/functions/scalar/llm_filter.cpp index df8a7754..77001834 100644 --- a/test/unit/functions/scalar/llm_filter.cpp +++ b/test/unit/functions/scalar/llm_filter.cpp @@ -62,6 +62,19 @@ TEST_F(LLMFilterTest, LLMFilterBasicUsage) { ASSERT_EQ(results->GetValue(0, 0).GetValue(), "true"); } +TEST_F(LLMFilterTest, LLMFilterWithoutContextColumns) { + const nlohmann::json expected_response = {{"items", {true}}}; + EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) + .Times(1); + EXPECT_CALL(*mock_provider, CollectCompletions(::testing::_)) + .WillOnce(::testing::Return(std::vector{expected_response})); + + auto con = Config::GetConnection(); + const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, {'prompt': 'Is paris the best capital in the world?'}) AS filter_result;"); + ASSERT_EQ(results->RowCount(), 1); + ASSERT_EQ(results->GetValue(0, 0).GetValue(), "true"); +} + TEST_F(LLMFilterTest, LLMFilterWithMultipleRows) { const nlohmann::json expected_response = {{"items", {true}}}; EXPECT_CALL(*mock_provider, AddCompletionRequest(::testing::_, ::testing::_, ::testing::_, ::testing::_)) From 96d17946b794ae23a5e90351d31a676549fe219c Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Sun, 14 Dec 2025 18:01:54 -0500 Subject: [PATCH 53/59] Refactored storage attachment with RAII guard and retry mechanism --- src/core/config/config.cpp | 60 +++++- src/custom_parser/query/model_parser.cpp | 186 ++++++++++-------- src/custom_parser/query/prompt_parser.cpp | 164 ++++++++------- src/custom_parser/query_parser.cpp | 90 +++++++++ src/include/flock/core/config.hpp | 23 +++ .../flock/custom_parser/query_parser.hpp | 15 ++ src/model_manager/model.cpp | 1 + src/prompt_manager/prompt_manager.cpp | 1 + 8 files changed, 386 insertions(+), 154 deletions(-) diff --git a/src/core/config/config.cpp b/src/core/config/config.cpp index 743c3690..e36eac90 100644 --- a/src/core/config/config.cpp +++ b/src/core/config/config.cpp @@ -1,6 +1,7 @@ #include "flock/core/config.hpp" #include "filesystem.hpp" #include "flock/secret_manager/secret_manager.hpp" +#include #include namespace flock { @@ -65,8 +66,6 @@ void Config::ConfigureGlobal() { void Config::ConfigureLocal(duckdb::DatabaseInstance& db) { auto con = Config::GetConnection(&db); ConfigureTables(con, ConfigType::LOCAL); - con.Query( - duckdb_fmt::format("ATTACH DATABASE '{}' AS flock_storage;", Config::get_global_storage_path().string())); } void Config::ConfigureTables(duckdb::Connection& con, const ConfigType type) { @@ -89,4 +88,61 @@ void Config::Configure(duckdb::ExtensionLoader& loader) { } } +void Config::AttachToGlobalStorage(duckdb::Connection& con, bool read_only) { + con.Query(duckdb_fmt::format("ATTACH DATABASE '{}' AS flock_storage {};", + Config::get_global_storage_path().string(), read_only ? "(READ_ONLY)" : "")); +} + +void Config::DetachFromGlobalStorage(duckdb::Connection& con) { + con.Query("DETACH DATABASE flock_storage;"); +} + +bool Config::StorageAttachmentGuard::TryAttach(bool read_only) { + try { + Config::AttachToGlobalStorage(connection, read_only); + return true; + } catch (const std::exception&) { + return false; + } +} + +bool Config::StorageAttachmentGuard::TryDetach() { + try { + Config::DetachFromGlobalStorage(connection); + return true; + } catch (const std::exception&) { + return false; + } +} + +void Config::StorageAttachmentGuard::Wait(int milliseconds) { + auto start = std::chrono::steady_clock::now(); + auto duration = std::chrono::milliseconds(milliseconds); + while (std::chrono::steady_clock::now() - start < duration) { + // Busy-wait until the specified duration has elapsed + } +} + +Config::StorageAttachmentGuard::StorageAttachmentGuard(duckdb::Connection& con, bool read_only) + : connection(con), attached(false) { + for (int attempt = 0; attempt < MAX_RETRIES; ++attempt) { + if (TryAttach(read_only)) { + attached = true; + return; + } + Wait(RETRY_DELAY_MS); + } + Config::AttachToGlobalStorage(connection, read_only); + attached = true; +} + +Config::StorageAttachmentGuard::~StorageAttachmentGuard() { + if (attached) { + try { + Config::DetachFromGlobalStorage(connection); + } catch (...) { + } + } +} + }// namespace flock diff --git a/src/custom_parser/query/model_parser.cpp b/src/custom_parser/query/model_parser.cpp index 89918b94..7d422834 100644 --- a/src/custom_parser/query/model_parser.cpp +++ b/src/custom_parser/query/model_parser.cpp @@ -2,6 +2,7 @@ #include "flock/core/common.hpp" #include "flock/core/config.hpp" +#include "flock/custom_parser/query_parser.hpp" #include #include @@ -303,101 +304,120 @@ std::string ModelParser::ToSQL(const QueryStatement& statement) const { switch (statement.type) { case StatementType::CREATE_MODEL: { const auto& create_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = con.Query(duckdb_fmt::format( - " SELECT model_name" - " FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE" - " WHERE model_name = '{}'" - " UNION ALL " - " SELECT model_name " - " FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" - " WHERE model_name = '{}';", - create_stmt.model_name, create_stmt.catalog.empty() ? "flock_storage." : "", create_stmt.model_name)); - if (result->RowCount() != 0) { - throw std::runtime_error(duckdb_fmt::format("Model '{}' already exist.", create_stmt.model_name)); - } + query = ExecuteQueryWithStorage([&create_stmt](duckdb::Connection& con) { + // Check if model already exists + auto result = con.Query(duckdb_fmt::format( + " SELECT model_name" + " FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE" + " WHERE model_name = '{}'" + " UNION ALL " + " SELECT model_name " + " FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " WHERE model_name = '{}';", + create_stmt.model_name, create_stmt.catalog.empty() ? "flock_storage." : "", create_stmt.model_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() != 0) { + throw std::runtime_error(duckdb_fmt::format("Model '{}' already exist.", create_stmt.model_name)); + } - query = duckdb_fmt::format(" INSERT INTO " - " {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - " (model_name, model, provider_name, model_args) " - " VALUES ('{}', '{}', '{}', '{}');", - create_stmt.catalog, create_stmt.model_name, create_stmt.model, - create_stmt.provider_name, create_stmt.model_args.dump()); + // Insert the new model + auto insert_query = duckdb_fmt::format(" INSERT INTO " + " {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + " (model_name, model, provider_name, model_args) " + " VALUES ('{}', '{}', '{}', '{}');", + create_stmt.catalog, create_stmt.model_name, create_stmt.model, + create_stmt.provider_name, create_stmt.model_args.dump()); + con.Query(insert_query); + + return std::string("SELECT 'Model created successfully' AS status"); + }, + false); break; } case StatementType::DELETE_MODEL: { const auto& delete_stmt = static_cast(statement); - auto con = Config::GetConnection(); - - con.Query(duckdb_fmt::format(" DELETE FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - " WHERE model_name = '{}';", - delete_stmt.model_name)); - - query = duckdb_fmt::format(" DELETE FROM " + query = ExecuteSetQuery( + duckdb_fmt::format(" DELETE FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + " WHERE model_name = '{}'; " + " DELETE FROM " " flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " " WHERE model_name = '{}';", - delete_stmt.model_name, delete_stmt.model_name); + delete_stmt.model_name, delete_stmt.model_name), + "Model deleted successfully", + false); break; } case StatementType::UPDATE_MODEL: { const auto& update_stmt = static_cast(statement); - auto con = Config::GetConnection(); - // get the location of the model_name if local or global - auto result = con.Query( - duckdb_fmt::format(" SELECT model_name, 'global' AS scope " - " FROM flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" - " WHERE model_name = '{}'" - " UNION ALL " - " SELECT model_name, 'local' AS scope " - " FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" - " WHERE model_name = '{}';", - update_stmt.model_name, update_stmt.model_name, update_stmt.model_name)); + query = ExecuteQueryWithStorage([&update_stmt](duckdb::Connection& con) { + // Get the location of the model_name if local or global + auto result = con.Query( + duckdb_fmt::format(" SELECT model_name, 'global' AS scope " + " FROM flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " WHERE model_name = '{}'" + " UNION ALL " + " SELECT model_name, 'local' AS scope " + " FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " WHERE model_name = '{}';", + update_stmt.model_name, update_stmt.model_name, update_stmt.model_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() == 0) { + throw std::runtime_error(duckdb_fmt::format("Model '{}' doesn't exist.", update_stmt.model_name)); + } - if (result->RowCount() == 0) { - throw std::runtime_error(duckdb_fmt::format("Model '{}' doesn't exist.", update_stmt.model_name)); - } + auto catalog = materialized_result.GetValue(1, 0).ToString() == "global" ? "flock_storage." : ""; - auto catalog = result->GetValue(1, 0).ToString() == "global" ? "flock_storage." : ""; + con.Query(duckdb_fmt::format(" UPDATE {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + " SET model = '{}', provider_name = '{}', " + " model_args = '{}' WHERE model_name = '{}'; ", + catalog, update_stmt.new_model, update_stmt.provider_name, + update_stmt.new_model_args.dump(), update_stmt.model_name)); - query = duckdb_fmt::format(" UPDATE {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - " SET model = '{}', provider_name = '{}', " - " model_args = '{}' WHERE model_name = '{}'; ", - catalog, update_stmt.new_model, update_stmt.provider_name, - update_stmt.new_model_args.dump(), update_stmt.model_name); + return std::string("SELECT 'Model updated successfully' AS status"); + }, + false); break; } case StatementType::UPDATE_MODEL_SCOPE: { const auto& update_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = - con.Query(duckdb_fmt::format(" SELECT model_name " - " FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" - " WHERE model_name = '{}';", - update_stmt.catalog, update_stmt.model_name)); - if (result->RowCount() != 0) { - throw std::runtime_error( - duckdb_fmt::format("Model '{}' already exist in {} storage.", update_stmt.model_name, - update_stmt.catalog == "flock_storage." ? "global" : "local")); - } + query = ExecuteQueryWithStorage([&update_stmt](duckdb::Connection& con) { + auto result = con.Query(duckdb_fmt::format(" SELECT model_name " + " FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " WHERE model_name = '{}';", + update_stmt.catalog, update_stmt.model_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() != 0) { + throw std::runtime_error( + duckdb_fmt::format("Model '{}' already exist in {} storage.", update_stmt.model_name, + update_stmt.catalog == "flock_storage." ? "global" : "local")); + } - con.Query(duckdb_fmt::format("INSERT INTO {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - "(model_name, model, provider_name, model_args) " - "SELECT model_name, model, provider_name, model_args " - "FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - "WHERE model_name = '{}'; ", - update_stmt.catalog, - update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", - update_stmt.model_name)); - - query = duckdb_fmt::format("DELETE FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " - "WHERE model_name = '{}'; ", - update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", - update_stmt.model_name); + con.Query(duckdb_fmt::format("INSERT INTO {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + "(model_name, model, provider_name, model_args) " + "SELECT model_name, model, provider_name, model_args " + "FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + "WHERE model_name = '{}'; ", + update_stmt.catalog, + update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", + update_stmt.model_name)); + + con.Query(duckdb_fmt::format("DELETE FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " + "WHERE model_name = '{}'; ", + update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", + update_stmt.model_name)); + + return std::string("SELECT 'Model scope updated successfully' AS status"); + }, + false); break; } case StatementType::GET_MODEL: { const auto& get_stmt = static_cast(statement); - query = duckdb_fmt::format("SELECT 'global' AS scope, * " + query = ExecuteGetQuery( + duckdb_fmt::format("SELECT 'global' AS scope, * " "FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE " "WHERE model_name = '{}' " "UNION ALL " @@ -408,20 +428,22 @@ std::string ModelParser::ToSQL(const QueryStatement& statement) const { "SELECT 'local' AS scope, * " "FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE " "WHERE model_name = '{}';", - get_stmt.model_name, get_stmt.model_name, get_stmt.model_name, get_stmt.model_name); + get_stmt.model_name, get_stmt.model_name, get_stmt.model_name, get_stmt.model_name), + true); break; } case StatementType::GET_ALL_MODEL: { - query = duckdb_fmt::format(" SELECT 'global' AS scope, * " - " FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE" - " UNION ALL " - " SELECT 'global' AS scope, * " - " FROM flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" - " UNION ALL " - " SELECT 'local' AS scope, * " - " FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE;", - Config::get_global_storage_path().string()); + query = ExecuteGetQuery( + " SELECT 'global' AS scope, * " + " FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE" + " UNION ALL " + " SELECT 'global' AS scope, * " + " FROM flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " UNION ALL " + " SELECT 'local' AS scope, * " + " FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE;", + true); break; } default: diff --git a/src/custom_parser/query/prompt_parser.cpp b/src/custom_parser/query/prompt_parser.cpp index 1150938f..7f4bcfa5 100644 --- a/src/custom_parser/query/prompt_parser.cpp +++ b/src/custom_parser/query/prompt_parser.cpp @@ -2,6 +2,7 @@ #include "flock/core/common.hpp" #include "flock/core/config.hpp" +#include "flock/custom_parser/query_parser.hpp" #include #include @@ -216,89 +217,110 @@ std::string PromptParser::ToSQL(const QueryStatement& statement) const { switch (statement.type) { case StatementType::CREATE_PROMPT: { const auto& create_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = con.Query(duckdb_fmt::format(" SELECT prompt_name " - " FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" - " WHERE prompt_name = '{}';", - create_stmt.catalog.empty() ? "flock_storage." : "", - create_stmt.prompt_name)); - if (result->RowCount() != 0) { - throw std::runtime_error(duckdb_fmt::format("Prompt '{}' already exist.", create_stmt.prompt_name)); - } - query = duckdb_fmt::format(" INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - " (prompt_name, prompt) " - " VALUES ('{}', '{}'); ", - create_stmt.catalog, create_stmt.prompt_name, create_stmt.prompt); + query = ExecuteQueryWithStorage([&create_stmt](duckdb::Connection& con) { + auto result = con.Query(duckdb_fmt::format(" SELECT prompt_name " + " FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " WHERE prompt_name = '{}';", + create_stmt.catalog.empty() ? "flock_storage." : "", + create_stmt.prompt_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() != 0) { + throw std::runtime_error(duckdb_fmt::format("Prompt '{}' already exist.", create_stmt.prompt_name)); + } + + auto insert_query = duckdb_fmt::format(" INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + " (prompt_name, prompt) " + " VALUES ('{}', '{}'); ", + create_stmt.catalog, create_stmt.prompt_name, create_stmt.prompt); + con.Query(insert_query); + + return std::string("SELECT 'Prompt created successfully' AS status"); + }, + false); break; } case StatementType::DELETE_PROMPT: { const auto& delete_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = con.Query(duckdb_fmt::format(" DELETE FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - " WHERE prompt_name = '{}'; ", - delete_stmt.prompt_name)); - - query = duckdb_fmt::format(" DELETE FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + query = ExecuteSetQuery( + duckdb_fmt::format(" DELETE FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + " WHERE prompt_name = '{}'; " + " DELETE FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " " WHERE prompt_name = '{}'; ", - delete_stmt.prompt_name); + delete_stmt.prompt_name, delete_stmt.prompt_name), + "Prompt deleted successfully", + false); break; } case StatementType::UPDATE_PROMPT: { const auto& update_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = - con.Query(duckdb_fmt::format(" SELECT version, 'local' AS scope " - " FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" - " WHERE prompt_name = '{}'" - " UNION ALL " - " SELECT version, 'global' AS scope " - " FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" - " WHERE prompt_name = '{}' " - " ORDER BY version DESC;", - update_stmt.prompt_name, update_stmt.prompt_name)); - if (result->RowCount() == 0) { - throw std::runtime_error(duckdb_fmt::format("Prompt '{}' doesn't exist.", update_stmt.prompt_name)); - } - - int version = result->GetValue(0, 0) + 1; - auto catalog = result->GetValue(1, 0).ToString() == "global" ? "flock_storage." : ""; - query = duckdb_fmt::format(" INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - " (prompt_name, prompt, version) " - " VALUES ('{}', '{}', {}); ", - catalog, update_stmt.prompt_name, update_stmt.new_prompt, version); + query = ExecuteQueryWithStorage([&update_stmt](duckdb::Connection& con) { + auto result = con.Query(duckdb_fmt::format(" SELECT version, 'local' AS scope " + " FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " WHERE prompt_name = '{}'" + " UNION ALL " + " SELECT version, 'global' AS scope " + " FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " WHERE prompt_name = '{}' " + " ORDER BY version DESC;", + update_stmt.prompt_name, update_stmt.prompt_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() == 0) { + throw std::runtime_error(duckdb_fmt::format("Prompt '{}' doesn't exist.", update_stmt.prompt_name)); + } + + int version = materialized_result.GetValue(0, 0) + 1; + auto catalog = materialized_result.GetValue(1, 0).ToString() == "global" ? "flock_storage." : ""; + + con.Query(duckdb_fmt::format(" INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + " (prompt_name, prompt, version) " + " VALUES ('{}', '{}', {}); ", + catalog, update_stmt.prompt_name, update_stmt.new_prompt, version)); + + return std::string("SELECT 'Prompt updated successfully' AS status"); + }, + false); break; } case StatementType::UPDATE_PROMPT_SCOPE: { const auto& update_stmt = static_cast(statement); - auto con = Config::GetConnection(); - auto result = con.Query(duckdb_fmt::format(" SELECT prompt_name " - " FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" - " WHERE prompt_name = '{}';", - update_stmt.catalog, update_stmt.prompt_name)); - if (result->RowCount() != 0) { - throw std::runtime_error( - duckdb_fmt::format("Model '{}' already exist in {} storage.", update_stmt.prompt_name, - update_stmt.catalog == "flock_storage." ? "global" : "local")); - } - - con.Query(duckdb_fmt::format("INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - "(prompt_name, prompt, updated_at, version) " - "SELECT prompt_name, prompt, updated_at, version " - "FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - "WHERE prompt_name = '{}';", - update_stmt.catalog, - update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", - update_stmt.prompt_name)); - - query = duckdb_fmt::format("DELETE FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " - "WHERE prompt_name = '{}'; ", - update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", - update_stmt.prompt_name); + query = ExecuteQueryWithStorage([&update_stmt](duckdb::Connection& con) { + auto result = con.Query(duckdb_fmt::format(" SELECT prompt_name " + " FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " WHERE prompt_name = '{}';", + update_stmt.catalog, update_stmt.prompt_name)); + + auto& materialized_result = result->Cast(); + if (materialized_result.RowCount() != 0) { + throw std::runtime_error( + duckdb_fmt::format("Prompt '{}' already exist in {} storage.", update_stmt.prompt_name, + update_stmt.catalog == "flock_storage." ? "global" : "local")); + } + + con.Query(duckdb_fmt::format("INSERT INTO {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + "(prompt_name, prompt, updated_at, version) " + "SELECT prompt_name, prompt, updated_at, version " + "FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + "WHERE prompt_name = '{}';", + update_stmt.catalog, + update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", + update_stmt.prompt_name)); + + con.Query(duckdb_fmt::format("DELETE FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " + "WHERE prompt_name = '{}'; ", + update_stmt.catalog == "flock_storage." ? "" : "flock_storage.", + update_stmt.prompt_name)); + + return std::string("SELECT 'Prompt scope updated successfully' AS status"); + }, + false); break; } case StatementType::GET_PROMPT: { const auto& get_stmt = static_cast(statement); - query = duckdb_fmt::format("SELECT 'global' AS scope, * " + query = ExecuteGetQuery( + duckdb_fmt::format("SELECT 'global' AS scope, * " "FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " "WHERE prompt_name = '{}' " "UNION ALL " @@ -306,12 +328,13 @@ std::string PromptParser::ToSQL(const QueryStatement& statement) const { "FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " "WHERE prompt_name = '{}' " "ORDER BY version DESC;", - get_stmt.prompt_name, get_stmt.prompt_name); - + get_stmt.prompt_name, get_stmt.prompt_name), + true); break; } case StatementType::GET_ALL_PROMPT: { - query = " SELECT 'global' as scope, t1.* " + query = ExecuteGetQuery( + " SELECT 'global' as scope, t1.* " " FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE AS t1 " " JOIN (SELECT prompt_name, MAX(version) AS max_version " " FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " @@ -325,7 +348,8 @@ std::string PromptParser::ToSQL(const QueryStatement& statement) const { " FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE " " GROUP BY prompt_name) AS t2 " " ON t1.prompt_name = t2.prompt_name " - " AND t1.version = t2.max_version; "; + " AND t1.version = t2.max_version; ", + true); break; } default: diff --git a/src/custom_parser/query_parser.cpp b/src/custom_parser/query_parser.cpp index 5eae8d6f..b8f766c9 100644 --- a/src/custom_parser/query_parser.cpp +++ b/src/custom_parser/query_parser.cpp @@ -1,12 +1,102 @@ #include "flock/custom_parser/query_parser.hpp" +#include "duckdb/main/materialized_query_result.hpp" #include "flock/core/common.hpp" +#include "flock/core/config.hpp" #include #include namespace flock { +// Format a DuckDB value for SQL (escape strings, handle NULLs) +std::string FormatValueForSQL(const duckdb::Value& value) { + if (value.IsNull()) { + return "NULL"; + } + auto str = value.ToString(); + // Escape single quotes by doubling them + std::string escaped; + escaped.reserve(str.length() + 10); + for (char c: str) { + if (c == '\'') { + escaped += "''"; + } else { + escaped += c; + } + } + return "'" + escaped + "'"; +} + +// Format query results as VALUES clause: SELECT * FROM VALUES (...) +std::string FormatResultsAsValues(duckdb::unique_ptr result) { + if (!result) { + return "SELECT * FROM (VALUES (NULL)) AS empty_result WHERE FALSE"; + } + + // Cast to MaterializedQueryResult to access GetValue and RowCount + auto& materialized_result = result->Cast(); + + if (materialized_result.RowCount() == 0) { + return "SELECT * FROM (VALUES (NULL)) AS empty_result WHERE FALSE"; + } + + std::ostringstream values_stream; + auto column_count = result->ColumnCount(); + + // Get column names + std::vector column_names; + column_names.reserve(column_count); + for (idx_t col = 0; col < column_count; col++) { + column_names.push_back(result->ColumnName(col)); + } + + // Format each row as VALUES tuple + for (idx_t row = 0; row < materialized_result.RowCount(); row++) { + if (row > 0) { + values_stream << ", "; + } + values_stream << "("; + for (idx_t col = 0; col < column_count; col++) { + if (col > 0) { + values_stream << ", "; + } + auto value = materialized_result.GetValue(col, row); + values_stream << FormatValueForSQL(value); + } + values_stream << ")"; + } + + // Build column names for the VALUES clause + std::ostringstream column_names_stream; + for (size_t i = 0; i < column_names.size(); i++) { + if (i > 0) { + column_names_stream << ", "; + } + column_names_stream << "\"" << column_names[i] << "\""; + } + + return duckdb_fmt::format("SELECT * FROM (VALUES {}) AS result({})", + values_stream.str(), column_names_stream.str()); +} + +// Execute a query with storage attachment and return formatted result for GET operations +std::string ExecuteGetQuery(const std::string& query, bool read_only) { + auto con = Config::GetConnection(); + Config::StorageAttachmentGuard guard(con, read_only); + auto result = con.Query(query); + return FormatResultsAsValues(std::move(result)); +} + +// Execute a query with storage attachment and return status message for SET operations +std::string ExecuteSetQuery(const std::string& query, const std::string& success_message, bool read_only) { + auto con = Config::GetConnection(); + Config::StorageAttachmentGuard guard(con, read_only); + con.Query(query); + return duckdb_fmt::format("SELECT '{}' AS status", success_message); +} + + std::string QueryParser::ParseQuery(const std::string& query) { Tokenizer tokenizer(query); diff --git a/src/include/flock/core/config.hpp b/src/include/flock/core/config.hpp index 1f402d35..88ce1581 100644 --- a/src/include/flock/core/config.hpp +++ b/src/include/flock/core/config.hpp @@ -26,6 +26,29 @@ class Config { static std::string get_default_models_table_name(); static std::string get_user_defined_models_table_name(); static std::string get_prompts_table_name(); + static void AttachToGlobalStorage(duckdb::Connection& con, bool read_only = true); + static void DetachFromGlobalStorage(duckdb::Connection& con); + + class StorageAttachmentGuard { + public: + StorageAttachmentGuard(duckdb::Connection& con, bool read_only = true); + ~StorageAttachmentGuard(); + + StorageAttachmentGuard(const StorageAttachmentGuard&) = delete; + StorageAttachmentGuard& operator=(const StorageAttachmentGuard&) = delete; + StorageAttachmentGuard(StorageAttachmentGuard&&) = delete; + StorageAttachmentGuard& operator=(StorageAttachmentGuard&&) = delete; + + private: + duckdb::Connection& connection; + bool attached; + static constexpr int MAX_RETRIES = 10; + static constexpr int RETRY_DELAY_MS = 1000; + + bool TryAttach(bool read_only); + bool TryDetach(); + void Wait(int milliseconds); + }; private: static void SetupGlobalStorageLocation(); diff --git a/src/include/flock/custom_parser/query_parser.hpp b/src/include/flock/custom_parser/query_parser.hpp index 3b4b1e25..08d57952 100644 --- a/src/include/flock/custom_parser/query_parser.hpp +++ b/src/include/flock/custom_parser/query_parser.hpp @@ -1,6 +1,7 @@ #pragma once #include "flock/core/common.hpp" +#include "flock/core/config.hpp" #include "flock/custom_parser/query/model_parser.hpp" #include "flock/custom_parser/query/prompt_parser.hpp" #include "flock/custom_parser/query_statements.hpp" @@ -10,6 +11,20 @@ namespace flock { +// Forward declarations for query execution utilities +std::string FormatValueForSQL(const duckdb::Value& value); +std::string FormatResultsAsValues(duckdb::unique_ptr result); +std::string ExecuteGetQuery(const std::string& query, bool read_only); +std::string ExecuteSetQuery(const std::string& query, const std::string& success_message, bool read_only); + +// Template function for executing queries with storage attachment +template +std::string ExecuteQueryWithStorage(Func&& query_func, bool read_only) { + auto con = Config::GetConnection(); + Config::StorageAttachmentGuard guard(con, read_only); + return query_func(con); +} + class QueryParser { public: std::string ParseQuery(const std::string& query); diff --git a/src/model_manager/model.cpp b/src/model_manager/model.cpp index 3a150618..f426f891 100644 --- a/src/model_manager/model.cpp +++ b/src/model_manager/model.cpp @@ -54,6 +54,7 @@ std::tuple> Model::GetQueriedMo model_name, model_name); auto con = Config::GetConnection(); + Config::StorageAttachmentGuard guard(con, true); auto query_result = con.Query(query); if (query_result->RowCount() == 0) { diff --git a/src/prompt_manager/prompt_manager.cpp b/src/prompt_manager/prompt_manager.cpp index 8a4ff171..270b3c76 100644 --- a/src/prompt_manager/prompt_manager.cpp +++ b/src/prompt_manager/prompt_manager.cpp @@ -214,6 +214,7 @@ PromptDetails PromptManager::CreatePromptDetails(const nlohmann::json& prompt_de version_where_clause, order_by_clause); error_message = duckdb_fmt::format("The provided `{}` prompt " + error_message, prompt_details.prompt_name); auto con = Config::GetConnection(); + Config::StorageAttachmentGuard guard(con, true); const auto query_result = con.Query(prompt_details_query); if (query_result->RowCount() == 0) { throw std::runtime_error(error_message); From a7f7dac41e5843eef52522c0f6e66d45054f5389 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:40:45 -0500 Subject: [PATCH 54/59] Add LlmFunctionBindData structure and input parsing utilities --- src/core/config/config.cpp | 1 - src/functions/input_parser.cpp | 32 +++++++++++++++++++ src/include/flock/core/common.hpp | 2 ++ src/include/flock/functions/input_parser.hpp | 1 + .../functions/llm_function_bind_data.hpp | 32 +++++++++++++++++++ 5 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 src/include/flock/functions/llm_function_bind_data.hpp diff --git a/src/core/config/config.cpp b/src/core/config/config.cpp index e36eac90..2cab60aa 100644 --- a/src/core/config/config.cpp +++ b/src/core/config/config.cpp @@ -1,7 +1,6 @@ #include "flock/core/config.hpp" #include "filesystem.hpp" #include "flock/secret_manager/secret_manager.hpp" -#include #include namespace flock { diff --git a/src/functions/input_parser.cpp b/src/functions/input_parser.cpp index e5e304da..09d49b91 100644 --- a/src/functions/input_parser.cpp +++ b/src/functions/input_parser.cpp @@ -90,4 +90,36 @@ nlohmann::json CastVectorOfStructsToJson(const duckdb::Vector& struct_vector, co return struct_json; } +nlohmann::json CastValueToJson(const duckdb::Value& value) { + nlohmann::json result; + + if (value.IsNull()) { + return result; + } + + auto& value_type = value.type(); + if (value_type.id() == duckdb::LogicalTypeId::STRUCT) { + auto& children = duckdb::StructValue::GetChildren(value); + auto child_count = duckdb::StructType::GetChildCount(value_type); + + for (idx_t i = 0; i < child_count; i++) { + auto key = duckdb::StructType::GetChildName(value_type, i); + auto& child_value = children[i]; + + if (!child_value.IsNull()) { + // Recursively convert child values + if (child_value.type().id() == duckdb::LogicalTypeId::STRUCT) { + result[key] = CastValueToJson(child_value); + } else if (child_value.type().id() == duckdb::LogicalTypeId::INTEGER) { + result[key] = child_value.GetValue(); + } else { + result[key] = child_value.ToString(); + } + } + } + } + + return result; +} + }// namespace flock diff --git a/src/include/flock/core/common.hpp b/src/include/flock/core/common.hpp index 7c68ce6f..b9be1f50 100644 --- a/src/include/flock/core/common.hpp +++ b/src/include/flock/core/common.hpp @@ -7,8 +7,10 @@ #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" // Common standard library includes +#include #include #include +#include #include #include #include diff --git a/src/include/flock/functions/input_parser.hpp b/src/include/flock/functions/input_parser.hpp index bc851700..04b848a3 100644 --- a/src/include/flock/functions/input_parser.hpp +++ b/src/include/flock/functions/input_parser.hpp @@ -7,5 +7,6 @@ namespace flock { nlohmann::json CastVectorOfStructsToJson(const duckdb::Vector& struct_vector, int size); +nlohmann::json CastValueToJson(const duckdb::Value& value); }// namespace flock diff --git a/src/include/flock/functions/llm_function_bind_data.hpp b/src/include/flock/functions/llm_function_bind_data.hpp new file mode 100644 index 00000000..9c96f4b9 --- /dev/null +++ b/src/include/flock/functions/llm_function_bind_data.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include "flock/core/common.hpp" +#include "flock/model_manager/model.hpp" + +namespace flock { + +struct LlmFunctionBindData : public duckdb::FunctionData { + nlohmann::json model_json;// Store model JSON to create fresh Model instances per call + std::string prompt; + + LlmFunctionBindData() = default; + + // Create a fresh Model instance (thread-safe, each call gets its own provider) + Model CreateModel() const { + return Model(model_json); + } + + duckdb::unique_ptr Copy() const override { + auto result = duckdb::make_uniq(); + result->model_json = model_json; + result->prompt = prompt; + return std::move(result); + } + + bool Equals(const duckdb::FunctionData& other) const override { + auto& other_bind = other.Cast(); + return prompt == other_bind.prompt && model_json == other_bind.model_json; + } +}; + +}// namespace flock From c0d644f99935e1218887afbb39c5f5a7ec4bd371 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:41:00 -0500 Subject: [PATCH 55/59] Add ResolveModelDetailsToJson and mock provider factory to model manager --- src/include/flock/model_manager/model.hpp | 17 +++ .../providers/handlers/base_handler.hpp | 1 - src/model_manager/model.cpp | 117 +++++++++++++++--- 3 files changed, 115 insertions(+), 20 deletions(-) diff --git a/src/include/flock/model_manager/model.hpp b/src/include/flock/model_manager/model.hpp index fc17acd0..7dcc63f0 100644 --- a/src/include/flock/model_manager/model.hpp +++ b/src/include/flock/model_manager/model.hpp @@ -1,6 +1,7 @@ #pragma once #include "fmt/format.h" +#include #include #include "duckdb/main/connection.hpp" @@ -31,12 +32,27 @@ class Model { std::vector CollectEmbeddings(const std::string& contentType = "application/json"); std::vector CollectTranscriptions(const std::string& contentType = "multipart/form-data"); ModelDetails GetModelDetails(); + nlohmann::json GetModelDetailsAsJson() const; + // Static helper method for binders to resolve model details to JSON + static nlohmann::json ResolveModelDetailsToJson(const nlohmann::json& user_model_json); + + // Factory function type for creating mock providers + using MockProviderFactory = std::function()>; + + // Set a factory to create fresh mock providers (each Model gets its own instance) + static void SetMockProviderFactory(MockProviderFactory factory) { + mock_provider_factory_ = std::move(factory); + } + + // Legacy: Set a shared mock provider (for backward compatibility - less safe for parallel tests) static void SetMockProvider(const std::shared_ptr& mock_provider) { mock_provider_ = mock_provider; } + static void ResetMockProvider() { mock_provider_ = nullptr; + mock_provider_factory_ = nullptr; } std::shared_ptr @@ -45,6 +61,7 @@ class Model { private: ModelDetails model_details_; inline static std::shared_ptr mock_provider_ = nullptr; + inline static MockProviderFactory mock_provider_factory_ = nullptr; void ConstructProvider(); void LoadModelDetails(const nlohmann::json& model_json); static std::tuple> GetQueriedModel(const std::string& model_name); diff --git a/src/include/flock/model_manager/providers/handlers/base_handler.hpp b/src/include/flock/model_manager/providers/handlers/base_handler.hpp index 59f48478..d4ba5f53 100644 --- a/src/include/flock/model_manager/providers/handlers/base_handler.hpp +++ b/src/include/flock/model_manager/providers/handlers/base_handler.hpp @@ -4,7 +4,6 @@ #include "flock/metrics/manager.hpp" #include "flock/model_manager/providers/handlers/handler.hpp" #include "session.hpp" -#include #include #include #include diff --git a/src/model_manager/model.cpp b/src/model_manager/model.cpp index f426f891..8da8d3b4 100644 --- a/src/model_manager/model.cpp +++ b/src/model_manager/model.cpp @@ -1,9 +1,13 @@ #include "flock/model_manager/model.hpp" #include "flock/secret_manager/secret_manager.hpp" +#include +#include +#include +#include +#include namespace flock { -// Regular expression to match a valid Base64 string const std::regex base64_regex(R"(^[A-Za-z0-9+/=]+$)"); bool is_base64(const std::string& str) { @@ -20,26 +24,72 @@ void Model::LoadModelDetails(const nlohmann::json& model_json) { if (model_details_.model_name.empty()) { throw std::invalid_argument("`model_name` is required in model settings"); } - auto query_result = GetQueriedModel(model_details_.model_name); - model_details_.model = - model_json.contains("model") ? model_json.at("model").get() : std::get<0>(query_result); - model_details_.provider_name = - model_json.contains("provider") ? model_json.at("provider").get() : std::get<1>(query_result); - auto secret_name = "__default_" + model_details_.provider_name; - if (model_details_.provider_name == AZURE) - secret_name += "_llm"; - if (model_json.contains("secret_name")) { - secret_name = model_json["secret_name"].get(); - } - model_details_.secret = SecretManager::GetSecret(secret_name); - model_details_.model_parameters = model_json.contains("model_parameters") ? nlohmann::json::parse(model_json.at("model_parameters").get()) : std::get<2>(query_result)["model_parameters"]; - model_details_.tuple_format = - model_json.contains("tuple_format") ? model_json.at("tuple_format").get() : std::get<2>(query_result).contains("tuple_format") ? std::get<2>(query_result).at("tuple_format").get() - : "XML"; + bool has_resolved_details = model_json.contains("model") && + model_json.contains("provider") && + model_json.contains("secret") && + model_json.contains("tuple_format") && + model_json.contains("batch_size"); + + nlohmann::json db_model_args; + + if (has_resolved_details) { + model_details_.model = model_json.at("model").get(); + model_details_.provider_name = model_json.at("provider").get(); + model_details_.secret = model_json["secret"].get>(); + model_details_.tuple_format = model_json.at("tuple_format").get(); + model_details_.batch_size = model_json.at("batch_size").get(); + + if (model_json.contains("model_parameters")) { + auto& mp = model_json.at("model_parameters"); + model_details_.model_parameters = mp.is_string() ? nlohmann::json::parse(mp.get()) : mp; + } else { + model_details_.model_parameters = nlohmann::json::object(); + } + } else { + auto [db_model, db_provider, db_args] = GetQueriedModel(model_details_.model_name); + model_details_.model = model_json.contains("model") ? model_json.at("model").get() : db_model; + model_details_.provider_name = model_json.contains("provider") ? model_json.at("provider").get() : db_provider; + db_model_args = db_args; + + if (model_json.contains("secret")) { + model_details_.secret = model_json["secret"].get>(); + } else { + auto secret_name = "__default_" + model_details_.provider_name; + if (model_details_.provider_name == AZURE) { + secret_name += "_llm"; + } + if (model_json.contains("secret_name")) { + secret_name = model_json["secret_name"].get(); + } + model_details_.secret = SecretManager::GetSecret(secret_name); + } + + if (model_json.contains("model_parameters")) { + auto& mp = model_json.at("model_parameters"); + model_details_.model_parameters = mp.is_string() ? nlohmann::json::parse(mp.get()) : mp; + } else if (db_model_args.contains("model_parameters")) { + model_details_.model_parameters = db_model_args["model_parameters"]; + } else { + model_details_.model_parameters = nlohmann::json::object(); + } + + if (model_json.contains("tuple_format")) { + model_details_.tuple_format = model_json.at("tuple_format").get(); + } else if (db_model_args.contains("tuple_format")) { + model_details_.tuple_format = db_model_args.at("tuple_format").get(); + } else { + model_details_.tuple_format = "XML"; + } - model_details_.batch_size = model_json.contains("batch_size") ? model_json.at("batch_size").get() : std::get<2>(query_result).contains("batch_size") ? std::get<2>(query_result).at("batch_size").get() - : 2048; + if (model_json.contains("batch_size")) { + model_details_.batch_size = model_json.at("batch_size").get(); + } else if (db_model_args.contains("batch_size")) { + model_details_.batch_size = db_model_args.at("batch_size").get(); + } else { + model_details_.batch_size = 2048; + } + } } std::tuple> Model::GetQueriedModel(const std::string& model_name) { @@ -77,6 +127,10 @@ std::tuple> Model::GetQueriedMo } void Model::ConstructProvider() { + if (mock_provider_factory_) { + provider_ = mock_provider_factory_(); + return; + } if (mock_provider_) { provider_ = mock_provider_; return; @@ -99,6 +153,31 @@ void Model::ConstructProvider() { ModelDetails Model::GetModelDetails() { return model_details_; } +nlohmann::json Model::GetModelDetailsAsJson() const { + nlohmann::json result; + result["model_name"] = model_details_.model_name; + result["model"] = model_details_.model; + result["provider"] = model_details_.provider_name; + result["tuple_format"] = model_details_.tuple_format; + result["batch_size"] = model_details_.batch_size; + result["secret"] = model_details_.secret; + if (!model_details_.model_parameters.empty()) { + result["model_parameters"] = model_details_.model_parameters; + } + return result; +} + +nlohmann::json Model::ResolveModelDetailsToJson(const nlohmann::json& user_model_json) { + Model temp_model(user_model_json); + auto resolved_json = temp_model.GetModelDetailsAsJson(); + + if (user_model_json.contains("secret_name")) { + resolved_json["secret_name"] = user_model_json["secret_name"]; + } + + return resolved_json; +} + void Model::AddCompletionRequest(const std::string& prompt, const int num_output_tuples, OutputType output_type, const nlohmann::json& media_data) { provider_->AddCompletionRequest(prompt, num_output_tuples, output_type, media_data); } From 1b17b8c5117379de18a84c562da5d85ae5c0fd65 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:41:14 -0500 Subject: [PATCH 56/59] Refactor scalar functions to use LlmFunctionBindData structure --- .../scalar/llm_complete/implementation.cpp | 36 +++-- .../scalar/llm_complete/registry.cpp | 3 +- .../scalar/llm_embedding/implementation.cpp | 27 ++-- .../scalar/llm_embedding/registry.cpp | 5 +- .../scalar/llm_filter/implementation.cpp | 43 +++--- src/functions/scalar/llm_filter/registry.cpp | 3 +- src/functions/scalar/scalar.cpp | 129 ++++++++++++++++++ .../flock/functions/scalar/llm_complete.hpp | 7 +- .../flock/functions/scalar/llm_embedding.hpp | 7 +- .../flock/functions/scalar/llm_filter.hpp | 7 +- src/include/flock/functions/scalar/scalar.hpp | 34 +++++ 11 files changed, 249 insertions(+), 52 deletions(-) diff --git a/src/functions/scalar/llm_complete/implementation.cpp b/src/functions/scalar/llm_complete/implementation.cpp index 5055d64e..83f60f14 100644 --- a/src/functions/scalar/llm_complete/implementation.cpp +++ b/src/functions/scalar/llm_complete/implementation.cpp @@ -1,10 +1,20 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "flock/functions/scalar/llm_complete.hpp" +#include "flock/functions/scalar/scalar.hpp" #include "flock/metrics/manager.hpp" +#include "flock/model_manager/model.hpp" -#include namespace flock { +duckdb::unique_ptr LlmComplete::Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments) { + return ScalarFunctionBase::ValidateAndInitializeBindData(context, arguments, "llm_complete", false); +} + + void LlmComplete::ValidateArguments(duckdb::DataChunk& args) { if (args.ColumnCount() < 2 || args.ColumnCount() > 3) { throw std::runtime_error("Invalid number of arguments."); @@ -24,12 +34,9 @@ void LlmComplete::ValidateArguments(duckdb::DataChunk& args) { } } -std::vector LlmComplete::Operation(duckdb::DataChunk& args) { - // LlmComplete::ValidateArguments(args); - auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); - Model model(model_details_json); +std::vector LlmComplete::Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data) { + Model model = bind_data->CreateModel(); - // Set model name and provider in metrics (context is already set in Execute) auto model_details = model.GetModelDetails(); MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); @@ -37,13 +44,13 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { auto context_columns = nlohmann::json::array(); if (prompt_context_json.contains("context_columns")) { context_columns = prompt_context_json["context_columns"]; - prompt_context_json.erase("context_columns"); } - auto prompt_details = PromptManager::CreatePromptDetails(prompt_context_json); + + auto prompt = bind_data->prompt; std::vector results; if (context_columns.empty()) { - auto template_str = prompt_details.prompt; + auto template_str = prompt; model.AddCompletionRequest(template_str, 1, OutputType::STRING); auto response = model.CollectCompletions()[0]["items"][0]; if (response.is_string()) { @@ -56,7 +63,7 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { return results; } - auto responses = BatchAndComplete(context_columns, prompt_details.prompt, ScalarFunctionType::COMPLETE, model); + auto responses = BatchAndComplete(context_columns, prompt, ScalarFunctionType::COMPLETE, model); results.reserve(responses.size()); for (const auto& response: responses) { @@ -71,18 +78,18 @@ std::vector LlmComplete::Operation(duckdb::DataChunk& args) { } void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); const void* invocation_id = MetricsManager::GenerateUniqueId(); - // Start metrics tracking MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_COMPLETE); - // Start execution timing auto exec_start = std::chrono::high_resolution_clock::now(); - if (const auto results = LlmComplete::Operation(args); static_cast(results.size()) == 1) { + auto& func_expr = state.expr.Cast(); + auto* bind_data = &func_expr.bind_info->Cast(); + + if (const auto results = LlmComplete::Operation(args, bind_data); static_cast(results.size()) == 1) { auto empty_vec = duckdb::Vector(std::string()); duckdb::UnaryExecutor::Execute( empty_vec, result, args.size(), @@ -94,7 +101,6 @@ void LlmComplete::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& stat } } - // End execution timing and update metrics auto exec_end = std::chrono::high_resolution_clock::now(); double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); MetricsManager::AddExecutionTime(exec_duration_ms); diff --git a/src/functions/scalar/llm_complete/registry.cpp b/src/functions/scalar/llm_complete/registry.cpp index 492085d4..7c3544f8 100644 --- a/src/functions/scalar/llm_complete/registry.cpp +++ b/src/functions/scalar/llm_complete/registry.cpp @@ -6,7 +6,8 @@ namespace flock { void ScalarRegistry::RegisterLlmComplete(duckdb::ExtensionLoader& loader) { loader.RegisterFunction(duckdb::ScalarFunction("llm_complete", {duckdb::LogicalType::ANY, duckdb::LogicalType::ANY}, - duckdb::LogicalType::JSON(), LlmComplete::Execute)); + duckdb::LogicalType::JSON(), LlmComplete::Execute, + LlmComplete::Bind)); } }// namespace flock diff --git a/src/functions/scalar/llm_embedding/implementation.cpp b/src/functions/scalar/llm_embedding/implementation.cpp index fa30e428..de8b9b05 100644 --- a/src/functions/scalar/llm_embedding/implementation.cpp +++ b/src/functions/scalar/llm_embedding/implementation.cpp @@ -1,11 +1,19 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "flock/core/config.hpp" #include "flock/functions/scalar/llm_embedding.hpp" #include "flock/metrics/manager.hpp" - -#include +#include "flock/model_manager/model.hpp" namespace flock { +duckdb::unique_ptr LlmEmbedding::Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments) { + return ScalarFunctionBase::ValidateAndInitializeBindData(context, arguments, "llm_embedding", true, false); +} + + void LlmEmbedding::ValidateArguments(duckdb::DataChunk& args) { if (args.ColumnCount() < 2 || args.ColumnCount() > 2) { throw std::runtime_error("LlmEmbedScalarParser: Invalid number of arguments."); @@ -18,9 +26,7 @@ void LlmEmbedding::ValidateArguments(duckdb::DataChunk& args) { } } -std::vector> LlmEmbedding::Operation(duckdb::DataChunk& args) { - // LlmEmbedding::ValidateArguments(args); - +std::vector> LlmEmbedding::Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data) { auto inputs = CastVectorOfStructsToJson(args.data[1], args.size()); for (const auto& item: inputs.items()) { if (item.key() != "context_columns") { @@ -33,10 +39,8 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC } } - auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); - Model model(model_details_json); + Model model = bind_data->CreateModel(); - // Set model name and provider in metrics (context is already set in Execute) auto model_details = model.GetModelDetails(); MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); @@ -79,17 +83,18 @@ std::vector> LlmEmbedding::Operation(duckdb::DataC } void LlmEmbedding::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); const void* invocation_id = MetricsManager::GenerateUniqueId(); - // Start metrics tracking MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_EMBEDDING); auto exec_start = std::chrono::high_resolution_clock::now(); - auto results = LlmEmbedding::Operation(args); + auto& func_expr = state.expr.Cast(); + auto* bind_data = &func_expr.bind_info->Cast(); + + auto results = LlmEmbedding::Operation(args, bind_data); auto index = 0; for (const auto& res: results) { diff --git a/src/functions/scalar/llm_embedding/registry.cpp b/src/functions/scalar/llm_embedding/registry.cpp index eadba2fc..35d8829c 100644 --- a/src/functions/scalar/llm_embedding/registry.cpp +++ b/src/functions/scalar/llm_embedding/registry.cpp @@ -5,8 +5,9 @@ namespace flock { void ScalarRegistry::RegisterLlmEmbedding(duckdb::ExtensionLoader& loader) { loader.RegisterFunction( - duckdb::ScalarFunction("llm_embedding", {duckdb::LogicalType::ANY, duckdb::LogicalType::ANY}, duckdb::LogicalType::LIST(duckdb::LogicalType::DOUBLE), - LlmEmbedding::Execute)); + duckdb::ScalarFunction("llm_embedding", {duckdb::LogicalType::ANY, duckdb::LogicalType::ANY}, + duckdb::LogicalType::LIST(duckdb::LogicalType::DOUBLE), + LlmEmbedding::Execute, LlmEmbedding::Bind)); } }// namespace flock diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index 360073f6..e0a4419f 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -1,11 +1,20 @@ +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "flock/core/config.hpp" #include "flock/functions/scalar/llm_filter.hpp" +#include "flock/functions/scalar/scalar.hpp" #include "flock/metrics/manager.hpp" - -#include +#include "flock/model_manager/model.hpp" namespace flock { +duckdb::unique_ptr LlmFilter::Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments) { + return ScalarFunctionBase::ValidateAndInitializeBindData(context, arguments, "llm_filter", false); +} + + void LlmFilter::ValidateArguments(duckdb::DataChunk& args) { if (args.ColumnCount() < 2 || args.ColumnCount() > 3) { throw std::runtime_error("Invalid number of arguments."); @@ -23,13 +32,9 @@ void LlmFilter::ValidateArguments(duckdb::DataChunk& args) { } } -std::vector LlmFilter::Operation(duckdb::DataChunk& args) { - // LlmFilter::ValidateArguments(args); - - auto model_details_json = CastVectorOfStructsToJson(args.data[0], 1); - Model model(model_details_json); +std::vector LlmFilter::Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data) { + Model model = bind_data->CreateModel(); - // Set model name and provider in metrics (context is already set in Execute) auto model_details = model.GetModelDetails(); MetricsManager::SetModelInfo(model_details.model_name, model_details.provider_name); @@ -37,23 +42,22 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { auto context_columns = nlohmann::json::array(); if (prompt_context_json.contains("context_columns")) { context_columns = prompt_context_json["context_columns"]; - prompt_context_json.erase("context_columns"); } - auto prompt_details = PromptManager::CreatePromptDetails(prompt_context_json); + + auto prompt = bind_data->prompt; std::vector results; if (context_columns.empty()) { - // Simple filter without per-row context. Ask once and return the boolean result. - model.AddCompletionRequest(prompt_details.prompt, 1, OutputType::BOOL); + auto template_str = prompt; + model.AddCompletionRequest(template_str, 1, OutputType::BOOL); auto response = model.CollectCompletions()[0]["items"][0]; - if (response.is_null()) { - results.emplace_back("true"); + results.push_back("true"); } else { - results.emplace_back(response.dump()); + results.push_back(response.dump()); } } else { - auto responses = BatchAndComplete(context_columns, prompt_details.prompt, ScalarFunctionType::FILTER, model); + auto responses = BatchAndComplete(context_columns, prompt, ScalarFunctionType::FILTER, model); results.reserve(responses.size()); for (const auto& response: responses) { @@ -69,17 +73,18 @@ std::vector LlmFilter::Operation(duckdb::DataChunk& args) { } void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result) { - // Get database instance and generate unique ID for metrics auto& context = state.GetContext(); auto* db = context.db.get(); const void* invocation_id = MetricsManager::GenerateUniqueId(); - // Start metrics tracking MetricsManager::StartInvocation(db, invocation_id, FunctionType::LLM_FILTER); auto exec_start = std::chrono::high_resolution_clock::now(); - const auto results = LlmFilter::Operation(args); + auto& func_expr = state.expr.Cast(); + auto* bind_data = &func_expr.bind_info->Cast(); + + const auto results = LlmFilter::Operation(args, bind_data); auto index = 0; for (const auto& res: results) { diff --git a/src/functions/scalar/llm_filter/registry.cpp b/src/functions/scalar/llm_filter/registry.cpp index d539dcf8..715bce04 100644 --- a/src/functions/scalar/llm_filter/registry.cpp +++ b/src/functions/scalar/llm_filter/registry.cpp @@ -6,7 +6,8 @@ namespace flock { void ScalarRegistry::RegisterLlmFilter(duckdb::ExtensionLoader& loader) { loader.RegisterFunction(duckdb::ScalarFunction("llm_filter", {duckdb::LogicalType::ANY, duckdb::LogicalType::ANY}, - duckdb::LogicalType::VARCHAR, LlmFilter::Execute)); + duckdb::LogicalType::VARCHAR, LlmFilter::Execute, + LlmFilter::Bind)); } }// namespace flock diff --git a/src/functions/scalar/scalar.cpp b/src/functions/scalar/scalar.cpp index 6b9335ee..6d022d81 100644 --- a/src/functions/scalar/scalar.cpp +++ b/src/functions/scalar/scalar.cpp @@ -1,7 +1,72 @@ #include "flock/functions/scalar/scalar.hpp" +#include "flock/model_manager/model.hpp" +#include namespace flock { +void ScalarFunctionBase::ValidateArgumentCount( + const duckdb::vector>& arguments, + const std::string& function_name) { + if (arguments.size() != 2) { + throw duckdb::BinderException( + function_name + " requires 2 arguments: (1) model, (2) prompt with context_columns. Got " + + std::to_string(arguments.size())); + } +} + +void ScalarFunctionBase::ValidateArgumentTypes( + const duckdb::vector>& arguments, + const std::string& function_name) { + if (arguments[0]->return_type.id() != duckdb::LogicalTypeId::STRUCT) { + throw duckdb::BinderException(function_name + ": First argument must be model (struct type)"); + } + if (arguments[1]->return_type.id() != duckdb::LogicalTypeId::STRUCT) { + throw duckdb::BinderException( + function_name + ": Second argument must be prompt with context_columns (struct type)"); + } +} + +ScalarFunctionBase::PromptStructInfo ScalarFunctionBase::ExtractPromptStructInfo( + const duckdb::LogicalType& prompt_type) { + PromptStructInfo info{false, std::nullopt, ""}; + + for (idx_t i = 0; i < duckdb::StructType::GetChildCount(prompt_type); i++) { + auto field_name = duckdb::StructType::GetChildName(prompt_type, i); + if (field_name == "context_columns") { + info.has_context_columns = true; + } else if (field_name == "prompt" || field_name == "prompt_name") { + if (!info.prompt_field_index.has_value()) { + info.prompt_field_index = i; + info.prompt_field_name = field_name; + } + } + } + + return info; +} + +void ScalarFunctionBase::ValidatePromptStructFields(const PromptStructInfo& info, + const std::string& function_name, + bool require_context_columns) { + if (require_context_columns && !info.has_context_columns) { + throw duckdb::BinderException( + function_name + ": Second argument must contain 'context_columns' field"); + } +} + +void ScalarFunctionBase::InitializeModelJson( + duckdb::ClientContext& context, + const duckdb::unique_ptr& model_expr, + LlmFunctionBindData& bind_data) { + if (!model_expr->IsFoldable()) { + return; + } + + auto model_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *model_expr); + auto user_model_json = CastValueToJson(model_value); + bind_data.model_json = Model::ResolveModelDetailsToJson(user_model_json); +} + nlohmann::json ScalarFunctionBase::Complete(nlohmann::json& columns, const std::string& user_prompt, ScalarFunctionType function_type, Model& model) { const auto [prompt, media_data] = PromptManager::Render(user_prompt, columns, function_type, model.GetModelDetails().tuple_format); @@ -80,4 +145,68 @@ nlohmann::json ScalarFunctionBase::BatchAndComplete(const nlohmann::json& tuples return responses; } +void ScalarFunctionBase::InitializePrompt( + duckdb::ClientContext& context, + const duckdb::unique_ptr& prompt_expr, + LlmFunctionBindData& bind_data) { + nlohmann::json prompt_json; + + if (prompt_expr->IsFoldable()) { + auto prompt_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *prompt_expr); + prompt_json = CastValueToJson(prompt_value); + } else if (prompt_expr->expression_class == duckdb::ExpressionClass::BOUND_FUNCTION) { + auto& func_expr = prompt_expr->Cast(); + const auto& struct_type = prompt_expr->return_type; + + for (idx_t i = 0; i < duckdb::StructType::GetChildCount(struct_type) && i < func_expr.children.size(); i++) { + auto field_name = duckdb::StructType::GetChildName(struct_type, i); + auto& child = func_expr.children[i]; + + if (field_name != "context_columns" && child->IsFoldable()) { + try { + auto field_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *child); + if (field_value.type().id() == duckdb::LogicalTypeId::VARCHAR) { + prompt_json[field_name] = field_value.GetValue(); + } else { + prompt_json[field_name] = CastValueToJson(field_value); + } + } catch (...) { + // Skip fields that can't be evaluated + } + } + } + } + + if (prompt_json.contains("context_columns")) { + prompt_json.erase("context_columns"); + } + + auto prompt_details = PromptManager::CreatePromptDetails(prompt_json); + bind_data.prompt = prompt_details.prompt; +} + +duckdb::unique_ptr ScalarFunctionBase::ValidateAndInitializeBindData( + duckdb::ClientContext& context, + duckdb::vector>& arguments, + const std::string& function_name, + bool require_context_columns, + bool initialize_prompt) { + + ValidateArgumentCount(arguments, function_name); + ValidateArgumentTypes(arguments, function_name); + + const auto& prompt_type = arguments[1]->return_type; + auto prompt_info = ExtractPromptStructInfo(prompt_type); + ValidatePromptStructFields(prompt_info, function_name, require_context_columns); + + auto bind_data = duckdb::make_uniq(); + + InitializeModelJson(context, arguments[0], *bind_data); + if (initialize_prompt) { + InitializePrompt(context, arguments[1], *bind_data); + } + + return bind_data; +} + }// namespace flock diff --git a/src/include/flock/functions/scalar/llm_complete.hpp b/src/include/flock/functions/scalar/llm_complete.hpp index e682b7e3..5a5473cf 100644 --- a/src/include/flock/functions/scalar/llm_complete.hpp +++ b/src/include/flock/functions/scalar/llm_complete.hpp @@ -1,13 +1,18 @@ #pragma once +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/functions/scalar/scalar.hpp" namespace flock { class LlmComplete : public ScalarFunctionBase { public: + static duckdb::unique_ptr Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments); static void ValidateArguments(duckdb::DataChunk& args); - static std::vector Operation(duckdb::DataChunk& args); + static std::vector Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data); static void Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); }; diff --git a/src/include/flock/functions/scalar/llm_embedding.hpp b/src/include/flock/functions/scalar/llm_embedding.hpp index 935ded90..2608aadb 100644 --- a/src/include/flock/functions/scalar/llm_embedding.hpp +++ b/src/include/flock/functions/scalar/llm_embedding.hpp @@ -1,13 +1,18 @@ #pragma once +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/functions/scalar/scalar.hpp" namespace flock { class LlmEmbedding : public ScalarFunctionBase { public: + static duckdb::unique_ptr Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments); static void ValidateArguments(duckdb::DataChunk& args); - static std::vector> Operation(duckdb::DataChunk& args); + static std::vector> Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data); static void Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); }; diff --git a/src/include/flock/functions/scalar/llm_filter.hpp b/src/include/flock/functions/scalar/llm_filter.hpp index 4c391fdd..37490fb5 100644 --- a/src/include/flock/functions/scalar/llm_filter.hpp +++ b/src/include/flock/functions/scalar/llm_filter.hpp @@ -1,13 +1,18 @@ #pragma once +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/functions/scalar/scalar.hpp" namespace flock { class LlmFilter : public ScalarFunctionBase { public: + static duckdb::unique_ptr Bind( + duckdb::ClientContext& context, + duckdb::ScalarFunction& bound_function, + duckdb::vector>& arguments); static void ValidateArguments(duckdb::DataChunk& args); - static std::vector Operation(duckdb::DataChunk& args); + static std::vector Operation(duckdb::DataChunk& args, LlmFunctionBindData* bind_data); static void Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, duckdb::Vector& result); }; diff --git a/src/include/flock/functions/scalar/scalar.hpp b/src/include/flock/functions/scalar/scalar.hpp index ebf4726e..8b514698 100644 --- a/src/include/flock/functions/scalar/scalar.hpp +++ b/src/include/flock/functions/scalar/scalar.hpp @@ -1,9 +1,11 @@ #pragma once #include +#include #include "flock/core/common.hpp" #include "flock/functions/input_parser.hpp" +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/model_manager/model.hpp" #include "flock/prompt_manager/prompt_manager.hpp" #include @@ -11,6 +13,31 @@ namespace flock { class ScalarFunctionBase { +private: + struct PromptStructInfo { + bool has_context_columns; + std::optional prompt_field_index; + std::string prompt_field_name; + }; + + static void ValidateArgumentCount(const duckdb::vector>& arguments, + const std::string& function_name); + + static void ValidateArgumentTypes(const duckdb::vector>& arguments, + const std::string& function_name); + + static PromptStructInfo ExtractPromptStructInfo(const duckdb::LogicalType& prompt_type); + + static void ValidatePromptStructFields(const PromptStructInfo& info, const std::string& function_name, bool require_context_columns); + + static void InitializeModelJson(duckdb::ClientContext& context, + const duckdb::unique_ptr& model_expr, + LlmFunctionBindData& bind_data); + + static void InitializePrompt(duckdb::ClientContext& context, + const duckdb::unique_ptr& prompt_expr, + LlmFunctionBindData& bind_data); + public: ScalarFunctionBase() = delete; @@ -23,6 +50,13 @@ class ScalarFunctionBase { static nlohmann::json BatchAndComplete(const nlohmann::json& tuples, const std::string& user_prompt_name, ScalarFunctionType function_type, Model& model); + + static duckdb::unique_ptr ValidateAndInitializeBindData( + duckdb::ClientContext& context, + duckdb::vector>& arguments, + const std::string& function_name, + bool require_context_columns = true, + bool initialize_prompt = true); }; }// namespace flock From b00cc491784230f220c9920cf3eb59baf37d93f6 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:41:28 -0500 Subject: [PATCH 57/59] Refactor aggregate functions to use LlmFunctionBindData structure --- src/functions/aggregate/aggregate.cpp | 128 ++++++++++++++-- src/functions/aggregate/aggregate_state.cpp | 17 --- .../llm_first_or_last/implementation.cpp | 130 +++++++++------- .../aggregate/llm_first_or_last/registry.cpp | 4 +- .../aggregate/llm_reduce/implementation.cpp | 103 +++++++------ .../aggregate/llm_reduce/registry.cpp | 2 +- .../aggregate/llm_rerank/implementation.cpp | 142 +++++++++--------- .../aggregate/llm_rerank/registry.cpp | 2 +- .../flock/functions/aggregate/aggregate.hpp | 66 ++++---- .../functions/aggregate/llm_first_or_last.hpp | 4 + .../flock/functions/aggregate/llm_reduce.hpp | 6 + .../flock/functions/aggregate/llm_rerank.hpp | 7 + 12 files changed, 374 insertions(+), 237 deletions(-) diff --git a/src/functions/aggregate/aggregate.cpp b/src/functions/aggregate/aggregate.cpp index 5f7c3fb0..d7389d25 100644 --- a/src/functions/aggregate/aggregate.cpp +++ b/src/functions/aggregate/aggregate.cpp @@ -1,38 +1,140 @@ #include "flock/functions/aggregate/aggregate.hpp" +#include "flock/model_manager/model.hpp" +#include "flock/prompt_manager/prompt_manager.hpp" +#include namespace flock { -void AggregateFunctionBase::ValidateArguments(duckdb::Vector inputs[], idx_t input_count) { - if (input_count != 3) { - throw std::runtime_error("Expected exactly 3 arguments for aggregate function, got " + std::to_string(input_count)); +void AggregateFunctionBase::ValidateArgumentCount( + const duckdb::vector>& arguments, + const std::string& function_name) { + if (arguments.size() != 2) { + throw duckdb::BinderException( + function_name + " requires 2 arguments: (1) model, (2) prompt with context_columns. Got " + + std::to_string(arguments.size())); } +} + +void AggregateFunctionBase::ValidateArgumentTypes( + const duckdb::vector>& arguments, + const std::string& function_name) { + if (arguments[0]->return_type.id() != duckdb::LogicalTypeId::STRUCT) { + throw duckdb::BinderException(function_name + ": First argument must be model (struct type)"); + } + if (arguments[1]->return_type.id() != duckdb::LogicalTypeId::STRUCT) { + throw duckdb::BinderException( + function_name + ": Second argument must be prompt with context_columns (struct type)"); + } +} + +AggregateFunctionBase::PromptStructInfo AggregateFunctionBase::ExtractPromptStructInfo( + const duckdb::LogicalType& prompt_type) { + PromptStructInfo info{false, std::nullopt, ""}; - if (inputs[0].GetType().id() != duckdb::LogicalTypeId::STRUCT) { - throw std::runtime_error("Expected a struct type for model details"); + for (idx_t i = 0; i < duckdb::StructType::GetChildCount(prompt_type); i++) { + auto field_name = duckdb::StructType::GetChildName(prompt_type, i); + if (field_name == "context_columns") { + info.has_context_columns = true; + } else if (field_name == "prompt" || field_name == "prompt_name") { + if (!info.prompt_field_index.has_value()) { + info.prompt_field_index = i; + info.prompt_field_name = field_name; + } + } } - if (inputs[1].GetType().id() != duckdb::LogicalTypeId::STRUCT) { - throw std::runtime_error("Expected a struct type for prompt details"); + return info; +} + +void AggregateFunctionBase::ValidatePromptStructFields(const PromptStructInfo& info, + const std::string& function_name) { + if (!info.has_context_columns) { + throw duckdb::BinderException( + function_name + ": Second argument must contain 'context_columns' field"); + } +} + +void AggregateFunctionBase::InitializeModelJson( + duckdb::ClientContext& context, + const duckdb::unique_ptr& model_expr, + LlmFunctionBindData& bind_data) { + if (!model_expr->IsFoldable()) { + return; } - if (inputs[2].GetType().id() != duckdb::LogicalTypeId::STRUCT) { - throw std::runtime_error("Expected a struct type for prompt inputs"); + auto model_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *model_expr); + auto user_model_json = CastValueToJson(model_value); + bind_data.model_json = Model::ResolveModelDetailsToJson(user_model_json); +} + +void AggregateFunctionBase::InitializePrompt( + duckdb::ClientContext& context, + const duckdb::unique_ptr& prompt_expr, + LlmFunctionBindData& bind_data) { + nlohmann::json prompt_json; + + if (prompt_expr->IsFoldable()) { + auto prompt_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *prompt_expr); + prompt_json = CastValueToJson(prompt_value); + } else if (prompt_expr->expression_class == duckdb::ExpressionClass::BOUND_FUNCTION) { + auto& func_expr = prompt_expr->Cast(); + const auto& struct_type = prompt_expr->return_type; + + for (idx_t i = 0; i < duckdb::StructType::GetChildCount(struct_type) && i < func_expr.children.size(); i++) { + auto field_name = duckdb::StructType::GetChildName(struct_type, i); + auto& child = func_expr.children[i]; + + if (field_name != "context_columns" && child->IsFoldable()) { + try { + auto field_value = duckdb::ExpressionExecutor::EvaluateScalar(context, *child); + if (field_value.type().id() == duckdb::LogicalTypeId::VARCHAR) { + prompt_json[field_name] = field_value.GetValue(); + } else { + prompt_json[field_name] = CastValueToJson(field_value); + } + } catch (...) { + // Skip fields that can't be evaluated + } + } + } } + + auto prompt_details = PromptManager::CreatePromptDetails(prompt_json); + bind_data.prompt = prompt_details.prompt; +} + +duckdb::unique_ptr AggregateFunctionBase::ValidateAndInitializeBindData( + duckdb::ClientContext& context, + duckdb::vector>& arguments, + const std::string& function_name) { + + ValidateArgumentCount(arguments, function_name); + ValidateArgumentTypes(arguments, function_name); + + const auto& prompt_type = arguments[1]->return_type; + auto prompt_info = ExtractPromptStructInfo(prompt_type); + ValidatePromptStructFields(prompt_info, function_name); + + auto bind_data = duckdb::make_uniq(); + + InitializeModelJson(context, arguments[0], *bind_data); + InitializePrompt(context, arguments[1], *bind_data); + + return bind_data; } -std::tuple +std::tuple AggregateFunctionBase::CastInputsToJson(duckdb::Vector inputs[], idx_t count) { - auto model_details_json = CastVectorOfStructsToJson(inputs[0], 1); auto prompt_context_json = CastVectorOfStructsToJson(inputs[1], count); auto context_columns = nlohmann::json::array(); if (prompt_context_json.contains("context_columns")) { context_columns = prompt_context_json["context_columns"]; prompt_context_json.erase("context_columns"); } else { - throw std::runtime_error("Expected 'context_columns' in prompt details"); + throw std::runtime_error("Missing 'context_columns' in second argument. The prompt struct must include context_columns."); } - return std::make_tuple(model_details_json, prompt_context_json, context_columns); + return std::make_tuple(prompt_context_json, context_columns); } }// namespace flock diff --git a/src/functions/aggregate/aggregate_state.cpp b/src/functions/aggregate/aggregate_state.cpp index 0f23338e..1fa4b797 100644 --- a/src/functions/aggregate/aggregate_state.cpp +++ b/src/functions/aggregate/aggregate_state.cpp @@ -4,8 +4,6 @@ namespace flock { void AggregateFunctionState::Initialize() { value = new nlohmann::json(nlohmann::json::array()); - model_details = nlohmann::json::object(); - user_query = ""; initialized = true; } @@ -26,7 +24,6 @@ void AggregateFunctionState::Update(const nlohmann::json& input) { (*value)[idx]["data"].push_back(item_value); } } else { - // For metadata, only set if not already set if (!(*value)[idx].contains(item.key())) { (*value)[idx][item.key()] = item.value(); } @@ -41,36 +38,25 @@ void AggregateFunctionState::Combine(const AggregateFunctionState& source) { Initialize(); } - // Copy model_details and user_query from source if not already set - if (model_details.empty() && !source.model_details.empty()) { - model_details = source.model_details; - user_query = source.user_query; - } - if (source.value) { auto idx = 0u; for (const auto& column: *source.value) { - // Ensure the target value array has enough elements if (value->size() <= idx) { value->push_back(nlohmann::json::object()); } - // Initialize data array if it doesn't exist if (!(*value)[idx].contains("data")) { (*value)[idx]["data"] = nlohmann::json::array(); } - // Merge column metadata - preserve existing, add new for (const auto& item: column.items()) { if (item.key() == "data") { - // Append data items if (item.value().is_array()) { for (const auto& item_value: item.value()) { (*value)[idx]["data"].push_back(item_value); } } } else { - // For metadata (name, type, etc), only set if not already set if (!(*value)[idx].contains(item.key())) { (*value)[idx][item.key()] = item.value(); } @@ -87,9 +73,6 @@ void AggregateFunctionState::Destroy() { delete value; value = nullptr; } - model_details = nlohmann::json::object(); - user_query.clear(); - user_query.shrink_to_fit(); } }// namespace flock diff --git a/src/functions/aggregate/llm_first_or_last/implementation.cpp b/src/functions/aggregate/llm_first_or_last/implementation.cpp index 08fbc651..876e441e 100644 --- a/src/functions/aggregate/llm_first_or_last/implementation.cpp +++ b/src/functions/aggregate/llm_first_or_last/implementation.cpp @@ -1,20 +1,26 @@ #include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_first_or_last.hpp" +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/metrics/manager.hpp" #include #include -#include -#include namespace flock { +duckdb::unique_ptr LlmFirstOrLast::Bind( + duckdb::ClientContext& context, + duckdb::AggregateFunction& function, + duckdb::vector>& arguments) { + return AggregateFunctionBase::ValidateAndInitializeBindData(context, arguments, function.name); +} + int LlmFirstOrLast::GetFirstOrLastTupleId(nlohmann::json& tuples) { - const auto [prompt, media_data] = PromptManager::Render(user_query, tuples, function_type, model.GetModelDetails().tuple_format); + const auto [prompt, media_data] = PromptManager::Render( + user_query, tuples, function_type, model.GetModelDetails().tuple_format); model.AddCompletionRequest(prompt, 1, OutputType::INTEGER, media_data); auto response = model.CollectCompletions()[0]; - // Find flock_row_id column to get valid IDs std::set valid_ids; for (const auto& column: tuples) { if (column.contains("name") && column["name"].is_string() && @@ -29,7 +35,6 @@ int LlmFirstOrLast::GetFirstOrLastTupleId(nlohmann::json& tuples) { } } - // Get LLM response - can be integer or string int result_id_int = -1; std::string result_id_str; if (response["items"][0].is_number_integer()) { @@ -46,10 +51,10 @@ int LlmFirstOrLast::GetFirstOrLastTupleId(nlohmann::json& tuples) { } } else { throw std::runtime_error( - "Invalid LLM response: Expected integer or string ID, got: " + response["items"][0].dump()); + "Invalid LLM response: Expected integer or string ID, got: " + + response["items"][0].dump()); } - // Validate that the ID exists in flock_row_id if (valid_ids.find(result_id_str) == valid_ids.end()) { throw std::runtime_error( "Invalid LLM response: The LLM returned ID '" + result_id_str + @@ -62,7 +67,6 @@ int LlmFirstOrLast::GetFirstOrLastTupleId(nlohmann::json& tuples) { nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { int num_tuples = static_cast(tuples[0]["data"].size()); - // If there's only 1 tuple, no need to call the LLM - just return it if (num_tuples <= 1) { auto result = nlohmann::json::array(); for (auto i = 0; i < static_cast(tuples.size()) - 1; i++) { @@ -83,7 +87,6 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { auto batch_tuples = nlohmann::json::array(); int start_index = 0; - model = Model(model_details); auto batch_size = std::min(model.GetModelDetails().batch_size, num_tuples); if (batch_size <= 0) { @@ -91,7 +94,6 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { } do { - for (auto i = 0; i < static_cast(tuples.size()); i++) { if (start_index == 0) { batch_tuples.push_back(nlohmann::json::object()); @@ -116,7 +118,6 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { auto result_idx = GetFirstOrLastTupleId(batch_tuples); batch_tuples.clear(); - // Build result excluding flock_row_id column (last column) for (auto i = 0; i < static_cast(tuples.size()) - 1; i++) { batch_tuples.push_back(nlohmann::json::object()); for (const auto& item: tuples[i].items()) { @@ -129,7 +130,7 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { } } } catch (const ExceededMaxOutputTokensError&) { - start_index -= batch_size;// Retry the current batch with reduced size + start_index -= batch_size; batch_size = static_cast(batch_size * 0.9); if (batch_size <= 0) { throw std::runtime_error("Batch size reduced to zero, unable to process tuples"); @@ -144,70 +145,85 @@ nlohmann::json LlmFirstOrLast::Evaluate(nlohmann::json& tuples) { void LlmFirstOrLast::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, AggregateFunctionType function_type) { - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); + const auto states_vector = reinterpret_cast( + duckdb::FlatVector::GetData(states)); + + FunctionType metrics_function_type = + (function_type == AggregateFunctionType::FIRST) ? FunctionType::LLM_FIRST : FunctionType::LLM_LAST; - // Map AggregateFunctionType to FunctionType - FunctionType metrics_function_type = (function_type == AggregateFunctionType::FIRST) ? FunctionType::LLM_FIRST : FunctionType::LLM_LAST; + auto& bind_data = aggr_input_data.bind_data->Cast(); + + auto temp_model = bind_data.CreateModel(); + auto model_details_obj = temp_model.GetModelDetails(); auto db = Config::db; std::vector processed_state_ids; - std::string merged_model_name; - std::string merged_provider; - // Process each state individually for (idx_t i = 0; i < count; i++) { auto result_idx = i + offset; auto* state = states_vector[i]; - if (state && state->value && !state->value->empty()) { - // Use model_details and user_query from the state (not static variables) - Model model(state->model_details); - auto model_details_obj = model.GetModelDetails(); - - // Get state ID for metrics - const void* state_id = static_cast(state); - processed_state_ids.push_back(state_id); + if (!state || !state->value || state->value->empty()) { + result.SetValue(result_idx, nullptr); + continue; + } - // Start metrics tracking - MetricsManager::StartInvocation(db, state_id, metrics_function_type); - MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + int num_tuples = static_cast((*state->value)[0]["data"].size()); - // Store model info for merged metrics (use first non-empty) - if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { - merged_model_name = model_details_obj.model_name; - merged_provider = model_details_obj.provider_name; + if (num_tuples <= 1) { + auto response = nlohmann::json::array(); + for (auto k = 0; k < static_cast(state->value->size()); k++) { + response.push_back(nlohmann::json::object()); + for (const auto& item: (*state->value)[k].items()) { + if (item.key() == "data") { + response[k]["data"] = nlohmann::json::array(); + if (!item.value().empty()) { + response[k]["data"].push_back(item.value()[0]); + } + } else { + response[k][item.key()] = item.value(); + } + } } + result.SetValue(result_idx, response.dump()); + continue; + } - auto exec_start = std::chrono::high_resolution_clock::now(); + const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); + MetricsManager::StartInvocation(db, state_id, metrics_function_type); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); - auto tuples_with_ids = *state->value; - tuples_with_ids.push_back(nlohmann::json::object()); - for (auto j = 0; j < static_cast((*state->value)[0]["data"].size()); j++) { - if (j == 0) { - tuples_with_ids.back()["name"] = "flock_row_id"; - tuples_with_ids.back()["data"] = nlohmann::json::array(); - } - tuples_with_ids.back()["data"].push_back(std::to_string(j)); - } - LlmFirstOrLast function_instance; - function_instance.function_type = function_type; - function_instance.user_query = state->user_query; - function_instance.model_details = state->model_details; - auto response = function_instance.Evaluate(tuples_with_ids); + auto exec_start = std::chrono::high_resolution_clock::now(); - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - MetricsManager::AddExecutionTime(exec_duration_ms); + nlohmann::json tuples_with_ids = *state->value; - result.SetValue(result_idx, response.dump()); - } else { - result.SetValue(result_idx, nullptr); + tuples_with_ids.push_back({{"name", "flock_row_id"}, {"data", nlohmann::json::array()}}); + for (int j = 0; j < num_tuples; j++) { + tuples_with_ids.back()["data"].push_back(std::to_string(j)); } + + if (bind_data.prompt.empty()) { + throw std::runtime_error("The prompt cannot be empty"); + } + + LlmFirstOrLast function_instance; + function_instance.function_type = function_type; + function_instance.user_query = bind_data.prompt; + function_instance.model = bind_data.CreateModel(); + auto response = function_instance.Evaluate(tuples_with_ids); + + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); + + result.SetValue(result_idx, response.dump()); } - // Merge all metrics from processed states into a single metrics entry - MetricsManager::MergeAggregateMetrics(db, processed_state_ids, metrics_function_type, - merged_model_name, merged_provider); + if (!processed_state_ids.empty()) { + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, metrics_function_type, + model_details_obj.model_name, model_details_obj.provider_name); + } } }// namespace flock diff --git a/src/functions/aggregate/llm_first_or_last/registry.cpp b/src/functions/aggregate/llm_first_or_last/registry.cpp index a3c23598..e08f28ec 100644 --- a/src/functions/aggregate/llm_first_or_last/registry.cpp +++ b/src/functions/aggregate/llm_first_or_last/registry.cpp @@ -9,7 +9,7 @@ void AggregateRegistry::RegisterLlmFirst(duckdb::ExtensionLoader& loader) { duckdb::LogicalType::JSON(), duckdb::AggregateFunction::StateSize, LlmFirstOrLast::Initialize, LlmFirstOrLast::Operation, LlmFirstOrLast::Combine, LlmFirstOrLast::Finalize, LlmFirstOrLast::SimpleUpdate, - nullptr, LlmFirstOrLast::Destroy)); + LlmFirstOrLast::Bind, LlmFirstOrLast::Destroy)); } void AggregateRegistry::RegisterLlmLast(duckdb::ExtensionLoader& loader) { @@ -18,7 +18,7 @@ void AggregateRegistry::RegisterLlmLast(duckdb::ExtensionLoader& loader) { duckdb::LogicalType::JSON(), duckdb::AggregateFunction::StateSize, LlmFirstOrLast::Initialize, LlmFirstOrLast::Operation, LlmFirstOrLast::Combine, LlmFirstOrLast::Finalize, LlmFirstOrLast::SimpleUpdate, - nullptr, LlmFirstOrLast::Destroy)); + LlmFirstOrLast::Bind, LlmFirstOrLast::Destroy)); } }// namespace flock \ No newline at end of file diff --git a/src/functions/aggregate/llm_reduce/implementation.cpp b/src/functions/aggregate/llm_reduce/implementation.cpp index e43f6232..a34fcc7e 100644 --- a/src/functions/aggregate/llm_reduce/implementation.cpp +++ b/src/functions/aggregate/llm_reduce/implementation.cpp @@ -1,29 +1,39 @@ #include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_reduce.hpp" +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/metrics/manager.hpp" #include -#include namespace flock { -nlohmann::json LlmReduce::ReduceBatch(nlohmann::json& tuples, const AggregateFunctionType& function_type, const nlohmann::json& summary) { - auto [prompt, media_data] = PromptManager::Render(user_query, tuples, function_type, model.GetModelDetails().tuple_format); +duckdb::unique_ptr LlmReduce::Bind( + duckdb::ClientContext& context, + duckdb::AggregateFunction& function, + duckdb::vector>& arguments) { + return AggregateFunctionBase::ValidateAndInitializeBindData(context, arguments, "llm_reduce"); +} + +nlohmann::json LlmReduce::ReduceBatch(nlohmann::json& tuples, + const AggregateFunctionType& function_type, + const nlohmann::json& summary) { + auto [prompt, media_data] = PromptManager::Render( + user_query, tuples, function_type, model.GetModelDetails().tuple_format); prompt += "\n\n" + summary.dump(4); - OutputType output_type = OutputType::STRING; - model.AddCompletionRequest(prompt, 1, output_type, media_data); + model.AddCompletionRequest(prompt, 1, OutputType::STRING, media_data); auto response = model.CollectCompletions()[0]; return response["items"][0]; -}; +} nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, const AggregateFunctionType& function_type) { auto batch_tuples = nlohmann::json::array(); auto summary = nlohmann::json::object({{"Previous Batch Summary", ""}}); int start_index = 0; - auto batch_size = std::min(model.GetModelDetails().batch_size, static_cast(tuples[0]["data"].size())); + int num_tuples = static_cast(tuples[0]["data"].size()); + auto batch_size = std::min(model.GetModelDetails().batch_size, num_tuples); if (batch_size <= 0) { throw std::runtime_error("Batch size must be greater than zero"); @@ -34,10 +44,8 @@ nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, batch_tuples.push_back(nlohmann::json::object()); for (const auto& item: tuples[i].items()) { if (item.key() == "data") { + batch_tuples[i]["data"] = nlohmann::json::array(); for (auto j = 0; j < batch_size && start_index + j < static_cast(item.value().size()); j++) { - if (j == 0) { - batch_tuples[i]["data"] = nlohmann::json::array(); - } batch_tuples[i]["data"].push_back(item.value()[start_index + j]); } } else { @@ -60,7 +68,7 @@ nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, } } - } while (start_index < static_cast(tuples[0]["data"].size())); + } while (start_index < num_tuples); return summary["Previous Batch Summary"]; } @@ -68,61 +76,60 @@ nlohmann::json LlmReduce::ReduceLoop(const nlohmann::json& tuples, void LlmReduce::FinalizeResults(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, idx_t count, idx_t offset, const AggregateFunctionType function_type) { - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); + const auto states_vector = reinterpret_cast( + duckdb::FlatVector::GetData(states)); + + // Get bind data - model_json and prompt are guaranteed to be initialized + auto& bind_data = aggr_input_data.bind_data->Cast(); + + // Get model details for metrics (create temp model just for details) + auto temp_model = bind_data.CreateModel(); + auto model_details_obj = temp_model.GetModelDetails(); auto db = Config::db; std::vector processed_state_ids; - std::string merged_model_name; - std::string merged_provider; // Process each state individually for (idx_t i = 0; i < count; i++) { auto result_idx = i + offset; auto* state = states_vector[i]; - if (state && state->value && !state->value->empty()) { - // Use model_details and user_query from the state - Model model(state->model_details); - auto model_details_obj = model.GetModelDetails(); - - // Get state ID for metrics - const void* state_id = static_cast(state); - processed_state_ids.push_back(state_id); - - // Start metrics tracking for this state - MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_REDUCE); - MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + if (!state || !state->value || state->value->empty()) { + result.SetValue(result_idx, nullptr); + continue; + } - // Store model info for merged metrics (use first non-empty) - if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { - merged_model_name = model_details_obj.model_name; - merged_provider = model_details_obj.provider_name; - } + // Track metrics for this state + const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_REDUCE); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); - auto exec_start = std::chrono::high_resolution_clock::now(); + auto exec_start = std::chrono::high_resolution_clock::now(); - LlmReduce reduce_instance; - reduce_instance.model = Model(state->model_details); - reduce_instance.user_query = state->user_query; - auto response = reduce_instance.ReduceLoop(*state->value, function_type); + // Create function instance with bind data and process + // IMPORTANT: Use CreateModel() for thread-safe Model instance + LlmReduce reduce_instance; + reduce_instance.model = bind_data.CreateModel(); + reduce_instance.user_query = bind_data.prompt; + auto response = reduce_instance.ReduceLoop(*state->value, function_type); - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - MetricsManager::AddExecutionTime(exec_duration_ms); + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); - if (response.is_string()) { - result.SetValue(result_idx, response.get()); - } else { - result.SetValue(result_idx, response.dump()); - } + if (response.is_string()) { + result.SetValue(result_idx, response.get()); } else { - result.SetValue(result_idx, nullptr); + result.SetValue(result_idx, response.dump()); } } - // Merge all metrics from processed states into a single metrics entry - MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_REDUCE, - merged_model_name, merged_provider); + // Merge all metrics from processed states + if (!processed_state_ids.empty()) { + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_REDUCE, + model_details_obj.model_name, model_details_obj.provider_name); + } } }// namespace flock diff --git a/src/functions/aggregate/llm_reduce/registry.cpp b/src/functions/aggregate/llm_reduce/registry.cpp index 31658023..c3885305 100644 --- a/src/functions/aggregate/llm_reduce/registry.cpp +++ b/src/functions/aggregate/llm_reduce/registry.cpp @@ -9,7 +9,7 @@ void AggregateRegistry::RegisterLlmReduce(duckdb::ExtensionLoader& loader) { duckdb::LogicalType::JSON(), duckdb::AggregateFunction::StateSize, LlmReduce::Initialize, LlmReduce::Operation, LlmReduce::Combine, LlmReduce::Finalize, LlmReduce::SimpleUpdate, - nullptr, LlmReduce::Destroy)); + LlmReduce::Bind, LlmReduce::Destroy)); } }// namespace flock \ No newline at end of file diff --git a/src/functions/aggregate/llm_rerank/implementation.cpp b/src/functions/aggregate/llm_rerank/implementation.cpp index 633b31a5..7bdd3c4b 100644 --- a/src/functions/aggregate/llm_rerank/implementation.cpp +++ b/src/functions/aggregate/llm_rerank/implementation.cpp @@ -1,16 +1,23 @@ #include "flock/core/config.hpp" #include "flock/functions/aggregate/llm_rerank.hpp" +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/metrics/manager.hpp" #include #include -#include namespace flock { +duckdb::unique_ptr LlmRerank::Bind( + duckdb::ClientContext& context, + duckdb::AggregateFunction& function, + duckdb::vector>& arguments) { + return AggregateFunctionBase::ValidateAndInitializeBindData(context, arguments, "llm_rerank"); +} + std::vector LlmRerank::RerankBatch(const nlohmann::json& tuples) { - auto [prompt, media_data] = - PromptManager::Render(user_query, tuples, AggregateFunctionType::RERANK, model.GetModelDetails().tuple_format); + auto [prompt, media_data] = PromptManager::Render( + user_query, tuples, AggregateFunctionType::RERANK, model.GetModelDetails().tuple_format); int num_tuples = static_cast(tuples[0]["data"].size()); @@ -74,10 +81,10 @@ std::vector LlmRerank::RerankBatch(const nlohmann::json& tuples) { } return indices; -}; +} nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { - const auto num_tuples = static_cast(tuples[0]["data"].size()); + const int num_tuples = static_cast(tuples[0]["data"].size()); // If there's only 1 tuple, no need to call the LLM - just return it if (num_tuples <= 1) { @@ -100,8 +107,7 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { auto final_ranked_tuples = nlohmann::json::array(); auto carry_forward_tuples = nlohmann::json::array(); - auto start_index = 0; - model = Model(model_details); + int start_index = 0; auto batch_size = static_cast(model.GetModelDetails().batch_size); if (batch_size == 2048) { @@ -117,18 +123,21 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { // Then add new tuples up to batch_size // Handle case where carry_forward_tuples is empty (first iteration) - auto remaining_space = window_tuples.empty() ? batch_size : (batch_size - static_cast(window_tuples[0]["data"].size())); - auto end_index = std::min(start_index + remaining_space, num_tuples); + int remaining_space = window_tuples.empty() + ? batch_size + : (batch_size - static_cast(window_tuples[0]["data"].size())); + int end_index = std::min(start_index + remaining_space, num_tuples); + for (auto i = 0; i < static_cast(tuples.size()); i++) { if (i >= static_cast(window_tuples.size())) { window_tuples.push_back(nlohmann::json::object()); } for (const auto& item: tuples[i].items()) { if (item.key() == "data") { - for (auto j = start_index; j < end_index; j++) { - if (j == 0) { - window_tuples[i]["data"] = nlohmann::json::array(); - } + if (!window_tuples[i].contains("data")) { + window_tuples[i]["data"] = nlohmann::json::array(); + } + for (int j = start_index; j < end_index; j++) { window_tuples[i]["data"].push_back(item.value()[j]); } } else { @@ -146,13 +155,10 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { } try { + // Build indexed tuples with flock_row_id auto indexed_tuples = window_tuples; - indexed_tuples.push_back(nlohmann::json::object()); - for (auto i = 0; i < static_cast(window_tuples[0]["data"].size()); i++) { - if (i == 0) { - indexed_tuples.back()["name"] = "flock_row_id"; - indexed_tuples.back()["data"] = nlohmann::json::array(); - } + indexed_tuples.push_back({{"name", "flock_row_id"}, {"data", nlohmann::json::array()}}); + for (int i = 0; i < static_cast(window_tuples[0]["data"].size()); i++) { indexed_tuples.back()["data"].push_back(std::to_string(i)); } @@ -160,7 +166,7 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { // Initialize final_ranked_tuples structure if needed (first time adding results) if (final_ranked_tuples.empty() && !window_tuples.empty()) { - for (auto i = 0u; i < window_tuples.size(); i++) { + for (size_t i = 0; i < window_tuples.size(); i++) { final_ranked_tuples.push_back(nlohmann::json::object()); // Copy metadata from window_tuples for (const auto& item: window_tuples[i].items()) { @@ -173,9 +179,9 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { } // Add the bottom half to final results (they won't be re-ranked) - auto half_batch = static_cast(ranked_indices.size()) / 2; - for (auto i = half_batch; i < static_cast(ranked_indices.size()); i++) { - auto idx = 0u; + int half_batch = static_cast(ranked_indices.size()) / 2; + for (int i = half_batch; i < static_cast(ranked_indices.size()); i++) { + size_t idx = 0; for (auto& column: window_tuples) { final_ranked_tuples[idx]["data"].push_back(column["data"][ranked_indices[i]]); idx++; @@ -185,7 +191,7 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { // Carry forward top half to next batch for re-ranking // Initialize carry_forward_tuples structure if needed if (carry_forward_tuples.empty() && !window_tuples.empty()) { - for (auto i = 0u; i < window_tuples.size(); i++) { + for (size_t i = 0; i < window_tuples.size(); i++) { carry_forward_tuples.push_back(nlohmann::json::object()); // Copy metadata from window_tuples for (const auto& item: window_tuples[i].items()) { @@ -196,8 +202,8 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { carry_forward_tuples[i]["data"] = nlohmann::json::array(); } } - for (auto i = 0; i < half_batch; i++) { - auto idx = 0u; + for (int i = 0; i < half_batch; i++) { + size_t idx = 0; for (auto& column: window_tuples) { carry_forward_tuples[idx]["data"].push_back(column["data"][ranked_indices[i]]); idx++; @@ -208,10 +214,10 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { // If we've processed all input tuples, add remaining carry forward to final results if (start_index >= num_tuples && !carry_forward_tuples.empty()) { - auto idx = 0u; + size_t idx = 0; for (const auto& column: carry_forward_tuples) { - for (const auto& i: column["data"]) { - final_ranked_tuples[idx]["data"].push_back(i); + for (const auto& data_item: column["data"]) { + final_ranked_tuples[idx]["data"].push_back(data_item); } idx++; } @@ -230,63 +236,61 @@ nlohmann::json LlmRerank::SlidingWindow(nlohmann::json& tuples) { return final_ranked_tuples; } -void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, duckdb::Vector& result, - idx_t count, idx_t offset) { - const auto states_vector = reinterpret_cast(duckdb::FlatVector::GetData(states)); +void LlmRerank::Finalize(duckdb::Vector& states, duckdb::AggregateInputData& aggr_input_data, + duckdb::Vector& result, idx_t count, idx_t offset) { + const auto states_vector = reinterpret_cast( + duckdb::FlatVector::GetData(states)); + + // Get bind data - model_json and prompt are guaranteed to be initialized + auto& bind_data = aggr_input_data.bind_data->Cast(); + + // Get model details for metrics (create temp model just for details) + auto temp_model = bind_data.CreateModel(); + auto model_details_obj = temp_model.GetModelDetails(); auto db = Config::db; std::vector processed_state_ids; - std::string merged_model_name; - std::string merged_provider; // Process each state individually for (idx_t i = 0; i < count; i++) { auto result_idx = i + offset; auto* state = states_vector[i]; - if (state && state->value && !state->value->empty()) { - // Use model_details and user_query from the state (not static variables) - Model model(state->model_details); - auto model_details_obj = model.GetModelDetails(); - - // Get state ID for metrics - const void* state_id = static_cast(state); - processed_state_ids.push_back(state_id); + if (!state || !state->value || state->value->empty()) { + result.SetValue(result_idx, nullptr); + continue; + } - // Start metrics tracking - MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_RERANK); - MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); + // Track metrics for this state + const void* state_id = static_cast(state); + processed_state_ids.push_back(state_id); + MetricsManager::StartInvocation(db, state_id, FunctionType::LLM_RERANK); + MetricsManager::SetModelInfo(model_details_obj.model_name, model_details_obj.provider_name); - // Store model info for merged metrics (use first non-empty) - if (merged_model_name.empty() && !model_details_obj.model_name.empty()) { - merged_model_name = model_details_obj.model_name; - merged_provider = model_details_obj.provider_name; - } + auto exec_start = std::chrono::high_resolution_clock::now(); - auto exec_start = std::chrono::high_resolution_clock::now(); + // Copy state value to avoid potential use-after-free issues + nlohmann::json tuples = *state->value; - auto tuples_with_ids = nlohmann::json::array(); - for (auto j = 0; j < static_cast(state->value->size()); j++) { - tuples_with_ids.push_back((*state->value)[j]); - } - LlmRerank function_instance; - function_instance.user_query = state->user_query; - function_instance.model_details = state->model_details; - auto reranked_tuples = function_instance.SlidingWindow(tuples_with_ids); + // Create function instance with bind data + // IMPORTANT: Use CreateModel() for thread-safe Model instance + LlmRerank function_instance; + function_instance.user_query = bind_data.prompt; + function_instance.model = bind_data.CreateModel(); + auto reranked_tuples = function_instance.SlidingWindow(tuples); - auto exec_end = std::chrono::high_resolution_clock::now(); - double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); - MetricsManager::AddExecutionTime(exec_duration_ms); + auto exec_end = std::chrono::high_resolution_clock::now(); + double exec_duration_ms = std::chrono::duration(exec_end - exec_start).count(); + MetricsManager::AddExecutionTime(exec_duration_ms); - result.SetValue(result_idx, reranked_tuples.dump()); - } else { - result.SetValue(result_idx, nullptr); - } + result.SetValue(result_idx, reranked_tuples.dump()); } - // Merge all metrics from processed states into a single metrics entry - MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_RERANK, - merged_model_name, merged_provider); + // Merge all metrics from processed states + if (!processed_state_ids.empty()) { + MetricsManager::MergeAggregateMetrics(db, processed_state_ids, FunctionType::LLM_RERANK, + model_details_obj.model_name, model_details_obj.provider_name); + } } }// namespace flock diff --git a/src/functions/aggregate/llm_rerank/registry.cpp b/src/functions/aggregate/llm_rerank/registry.cpp index 74739b70..5438606e 100644 --- a/src/functions/aggregate/llm_rerank/registry.cpp +++ b/src/functions/aggregate/llm_rerank/registry.cpp @@ -8,7 +8,7 @@ void AggregateRegistry::RegisterLlmRerank(duckdb::ExtensionLoader& loader) { "llm_rerank", {duckdb::LogicalType::ANY, duckdb::LogicalType::ANY}, duckdb::LogicalType::JSON(), duckdb::AggregateFunction::StateSize, LlmRerank::Initialize, LlmRerank::Operation, LlmRerank::Combine, LlmRerank::Finalize, LlmRerank::SimpleUpdate, - nullptr, LlmRerank::Destroy)); + LlmRerank::Bind, LlmRerank::Destroy)); } }// namespace flock diff --git a/src/include/flock/functions/aggregate/aggregate.hpp b/src/include/flock/functions/aggregate/aggregate.hpp index 1f3963e2..40d34bfd 100644 --- a/src/include/flock/functions/aggregate/aggregate.hpp +++ b/src/include/flock/functions/aggregate/aggregate.hpp @@ -2,9 +2,11 @@ #include "flock/core/common.hpp" #include "flock/functions/input_parser.hpp" +#include "flock/functions/llm_function_bind_data.hpp" #include "flock/metrics/manager.hpp" #include "flock/model_manager/model.hpp" #include +#include namespace flock { @@ -12,10 +14,8 @@ class AggregateFunctionState { public: nlohmann::basic_json<>* value; bool initialized; - nlohmann::json model_details; - std::string user_query; - AggregateFunctionState() : value(nullptr), initialized(false), model_details(nlohmann::json::object()), user_query("") {} + AggregateFunctionState() : value(nullptr), initialized(false) {} ~AggregateFunctionState() { if (value) { @@ -34,37 +34,58 @@ class AggregateFunctionBase { public: Model model; std::string user_query; - nlohmann::json model_details; public: explicit AggregateFunctionBase() = default; +private: + struct PromptStructInfo { + bool has_context_columns; + std::optional prompt_field_index; + std::string prompt_field_name; + }; + + static void ValidateArgumentCount(const duckdb::vector>& arguments, + const std::string& function_name); + + static void ValidateArgumentTypes(const duckdb::vector>& arguments, + const std::string& function_name); + + static PromptStructInfo ExtractPromptStructInfo(const duckdb::LogicalType& prompt_type); + + static void ValidatePromptStructFields(const PromptStructInfo& info, const std::string& function_name); + + static void InitializeModelJson(duckdb::ClientContext& context, + const duckdb::unique_ptr& model_expr, + LlmFunctionBindData& bind_data); + + static void InitializePrompt(duckdb::ClientContext& context, + const duckdb::unique_ptr& prompt_expr, + LlmFunctionBindData& bind_data); + public: - static void ValidateArguments(duckdb::Vector inputs[], idx_t input_count); - static std::tuple + static std::tuple CastInputsToJson(duckdb::Vector inputs[], idx_t count); + static duckdb::unique_ptr ValidateAndInitializeBindData( + duckdb::ClientContext& context, + duckdb::vector>& arguments, + const std::string& function_name); + static bool IgnoreNull() { return true; }; + template static void Initialize(const duckdb::AggregateFunction&, duckdb::data_ptr_t state_p) { auto state = reinterpret_cast(state_p); - - // Use placement new to properly construct the AggregateFunctionState object - // This handles memory allocation done by DuckDB new (state) AggregateFunctionState(); - - // Initialize the state (allocates JSON array, resets all fields) state->Initialize(); } template static void Operation(duckdb::Vector inputs[], duckdb::AggregateInputData& aggr_input_data, idx_t input_count, duckdb::Vector& states, idx_t count) { - // ValidateArguments(inputs, input_count); - - auto [model_details_json, prompt_details, columns] = CastInputsToJson(inputs, count); - auto prompt_str = PromptManager::CreatePromptDetails(prompt_details).prompt; + auto [prompt_details, columns] = CastInputsToJson(inputs, count); auto state_map_p = reinterpret_cast(duckdb::FlatVector::GetData(states)); @@ -85,11 +106,6 @@ class AggregateFunctionBase { } if (state) { - // Store model_details and user_query in the state (only set once, on first update) - if (state->model_details.empty()) { - state->model_details = model_details_json; - state->user_query = prompt_str; - } state->Update(tuple); } } @@ -98,17 +114,9 @@ class AggregateFunctionBase { template static void SimpleUpdate(duckdb::Vector inputs[], duckdb::AggregateInputData& aggr_input_data, idx_t input_count, duckdb::data_ptr_t state_p, idx_t count) { - // ValidateArguments(inputs, input_count); - - auto [model_details_json, prompt_details, tuples] = CastInputsToJson(inputs, count); - auto prompt_str = PromptManager::CreatePromptDetails(prompt_details).prompt; + auto [prompt_details, tuples] = CastInputsToJson(inputs, count); if (const auto state = reinterpret_cast(state_p)) { - // Store model_details and user_query in the state (only set once, on first update) - if (state->model_details.empty()) { - state->model_details = model_details_json; - state->user_query = prompt_str; - } state->Update(tuples); } } diff --git a/src/include/flock/functions/aggregate/llm_first_or_last.hpp b/src/include/flock/functions/aggregate/llm_first_or_last.hpp index bd4cbac0..f8970db7 100644 --- a/src/include/flock/functions/aggregate/llm_first_or_last.hpp +++ b/src/include/flock/functions/aggregate/llm_first_or_last.hpp @@ -1,6 +1,7 @@ #pragma once #include "flock/functions/aggregate/aggregate.hpp" +#include "flock/functions/llm_function_bind_data.hpp" namespace flock { @@ -14,6 +15,9 @@ class LlmFirstOrLast : public AggregateFunctionBase { int GetFirstOrLastTupleId(nlohmann::json& tuples); nlohmann::json Evaluate(nlohmann::json& tuples); + static duckdb::unique_ptr Bind(duckdb::ClientContext& context, duckdb::AggregateFunction& function, duckdb::vector>& arguments); + + public: static void Initialize(const duckdb::AggregateFunction& function, duckdb::data_ptr_t state_p) { AggregateFunctionBase::Initialize(function, state_p); diff --git a/src/include/flock/functions/aggregate/llm_reduce.hpp b/src/include/flock/functions/aggregate/llm_reduce.hpp index e5fe633b..7b663eac 100644 --- a/src/include/flock/functions/aggregate/llm_reduce.hpp +++ b/src/include/flock/functions/aggregate/llm_reduce.hpp @@ -1,6 +1,7 @@ #pragma once #include "flock/functions/aggregate/aggregate.hpp" +#include "flock/functions/llm_function_bind_data.hpp" namespace flock { @@ -12,6 +13,11 @@ class LlmReduce : public AggregateFunctionBase { nlohmann::json ReduceLoop(const nlohmann::json& tuples, const AggregateFunctionType& function_type); public: + static duckdb::unique_ptr Bind( + duckdb::ClientContext& context, + duckdb::AggregateFunction& function, + duckdb::vector>& arguments); + static void Initialize(const duckdb::AggregateFunction& function, duckdb::data_ptr_t state_p) { AggregateFunctionBase::Initialize(function, state_p); } diff --git a/src/include/flock/functions/aggregate/llm_rerank.hpp b/src/include/flock/functions/aggregate/llm_rerank.hpp index 4ff7d137..c6d2d41d 100644 --- a/src/include/flock/functions/aggregate/llm_rerank.hpp +++ b/src/include/flock/functions/aggregate/llm_rerank.hpp @@ -1,6 +1,7 @@ #pragma once #include "flock/functions/aggregate/aggregate.hpp" +#include "flock/functions/llm_function_bind_data.hpp" namespace flock { @@ -11,6 +12,12 @@ class LlmRerank : public AggregateFunctionBase { nlohmann::json SlidingWindow(nlohmann::json& tuples); std::vector RerankBatch(const nlohmann::json& tuples); +public: + static duckdb::unique_ptr Bind( + duckdb::ClientContext& context, + duckdb::AggregateFunction& function, + duckdb::vector>& arguments); + static void Initialize(const duckdb::AggregateFunction& function, duckdb::data_ptr_t state_p) { AggregateFunctionBase::Initialize(function, state_p); } From 990fc7f8ff6583b4905e34021e414eb39bd306b9 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:41:35 -0500 Subject: [PATCH 58/59] Update tests to match new function signatures --- .../aggregate/llm_aggregate_function_test_base.hpp | 10 +++++++++- test/unit/functions/aggregate/llm_reduce_json.cpp | 4 ++-- test/unit/functions/scalar/llm_complete.cpp | 2 +- test/unit/functions/scalar/llm_embedding.cpp | 2 +- test/unit/functions/scalar/llm_filter.cpp | 4 ++-- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp b/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp index 0f75c0bc..c84cac5c 100644 --- a/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp +++ b/test/unit/functions/aggregate/llm_aggregate_function_test_base.hpp @@ -31,8 +31,16 @@ class LLMAggregateTestBase : public ::testing::Test { " TYPE OLLAMA," " API_URL '127.0.0.1:11434');"); + // Create a shared mock provider for expectations mock_provider = std::make_shared(ModelDetails{}); - Model::SetMockProvider(mock_provider); + + // Use factory pattern so each Model gets a fresh mock instance + // This is thread-safe for parallel GROUP BY processing + Model::SetMockProviderFactory([this]() { + // Return the same mock for expectation purposes, but each Model + // instance calls this factory, so we can track expectations + return mock_provider; + }); } void TearDown() override { diff --git a/test/unit/functions/aggregate/llm_reduce_json.cpp b/test/unit/functions/aggregate/llm_reduce_json.cpp index 28c43f3b..cfef2ad2 100644 --- a/test/unit/functions/aggregate/llm_reduce_json.cpp +++ b/test/unit/functions/aggregate/llm_reduce_json.cpp @@ -5,7 +5,7 @@ namespace flock { class LLMReduceJsonTest : public LLMAggregateTestBase { protected: - static constexpr const char* EXPECTED_JSON_RESPONSE = R"({"items": [{"summary": "A comprehensive summary of running shoes, wireless headphones, and smart watches, featuring advanced technology and user-friendly designs for active lifestyles."}]})"; + static constexpr const char* EXPECTED_JSON_RESPONSE = R"({"items": [{"summary": "A comprehensive summary of some products"}]})"; std::string GetExpectedResponse() const override { return EXPECTED_JSON_RESPONSE; @@ -145,7 +145,7 @@ TEST_F(LLMReduceJsonTest, Operation_LargeInputSet_ProcessesCorrectly) { const auto results = con.Query( "SELECT id, " + GetFunctionName() + "(" "{'model_name': 'gpt-4o'}, " - "{'prompt': 'Create a JSON summary of all product descriptions with summary, total_items, and status fields', 'context_columns': [{'data': id::TEXT}, {'data': 'Product description ' || id::TEXT}]}" + "{'prompt': 'Create a JSON summary of all product descriptions with summary, total_items, and status fields', 'context_columns': [{'data': id::VARCHAR}, {'data': 'Product description ' || id::VARCHAR}]}" ") AS large_json_summary FROM range(" + std::to_string(input_count) + ") AS t(id) GROUP BY id;"); diff --git a/test/unit/functions/scalar/llm_complete.cpp b/test/unit/functions/scalar/llm_complete.cpp index 74ed613b..c2936597 100644 --- a/test/unit/functions/scalar/llm_complete.cpp +++ b/test/unit/functions/scalar/llm_complete.cpp @@ -145,7 +145,7 @@ TEST_F(LLMCompleteTest, Operation_LargeInputSet_ProcessesCorrectly) { auto query = "SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, " + "{'prompt': 'Summarize the following text', " + - " 'context_columns': [{'data': 'Input text ' || i::TEXT}]}) AS result " + + " 'context_columns': [{'data': 'Input text ' || i::VARCHAR}]}) AS result " + "FROM range(" + std::to_string(input_count) + ") AS t(i);"; const auto results = con.Query(query); diff --git a/test/unit/functions/scalar/llm_embedding.cpp b/test/unit/functions/scalar/llm_embedding.cpp index c4852dd5..b5d17e7c 100644 --- a/test/unit/functions/scalar/llm_embedding.cpp +++ b/test/unit/functions/scalar/llm_embedding.cpp @@ -143,7 +143,7 @@ TEST_F(LLMEmbeddingTest, Operation_LargeInputSet_ProcessesCorrectly) { .WillOnce(::testing::Return(std::vector{expected_response})); auto con = Config::GetConnection(); - const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'text-embedding-3-small'}, {'context_columns': [{'data': content}]}) AS embedding FROM range(" + std::to_string(input_count) + ") AS t(i), unnest(['Document content number ' || i::TEXT]) as tbl(content);"); + const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'text-embedding-3-small'}, {'context_columns': [{'data': content}]}) AS embedding FROM range(" + std::to_string(input_count) + ") AS t(i), unnest(['Document content number ' || i::VARCHAR]) as tbl(content);"); ASSERT_TRUE(!results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), input_count); diff --git a/test/unit/functions/scalar/llm_filter.cpp b/test/unit/functions/scalar/llm_filter.cpp index 77001834..66d1fbb0 100644 --- a/test/unit/functions/scalar/llm_filter.cpp +++ b/test/unit/functions/scalar/llm_filter.cpp @@ -70,7 +70,7 @@ TEST_F(LLMFilterTest, LLMFilterWithoutContextColumns) { .WillOnce(::testing::Return(std::vector{expected_response})); auto con = Config::GetConnection(); - const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, {'prompt': 'Is paris the best capital in the world?'}) AS filter_result;"); + const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, {'prompt': 'Are you a Robot?'}) AS filter_result;"); ASSERT_EQ(results->RowCount(), 1); ASSERT_EQ(results->GetValue(0, 0).GetValue(), "true"); } @@ -118,7 +118,7 @@ TEST_F(LLMFilterTest, Operation_LargeInputSet_ProcessesCorrectly) { .WillOnce(::testing::Return(std::vector{expected_response})); auto con = Config::GetConnection(); - const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, {'prompt': 'Is this content spam?', 'context_columns': [{'data': content}]}) AS result FROM range(" + std::to_string(input_count) + ") AS t(i), unnest(['Content item ' || i::TEXT]) as tbl(content);"); + const auto results = con.Query("SELECT " + GetFunctionName() + "({'model_name': 'gpt-4o'}, {'prompt': 'Is this content spam?', 'context_columns': [{'data': content}]}) AS result FROM range(" + std::to_string(input_count) + ") AS t(i), unnest(['Content item ' || i::VARCHAR]) as tbl(content);"); ASSERT_TRUE(!results->HasError()) << "Query failed: " << results->GetError(); ASSERT_EQ(results->RowCount(), input_count); From 7a6288f361c01d1fd4c393b7c46b5f670e6ce849 Mon Sep 17 00:00:00 2001 From: Anas Dorbani Date: Tue, 16 Dec 2025 14:51:58 -0500 Subject: [PATCH 59/59] Fix duplicate check for CREATE MODEL and CREATE PROMPT to check all tables --- src/custom_parser/query/model_parser.cpp | 9 ++++++--- src/custom_parser/query/prompt_parser.cpp | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/custom_parser/query/model_parser.cpp b/src/custom_parser/query/model_parser.cpp index 7d422834..ab7ecf66 100644 --- a/src/custom_parser/query/model_parser.cpp +++ b/src/custom_parser/query/model_parser.cpp @@ -305,16 +305,19 @@ std::string ModelParser::ToSQL(const QueryStatement& statement) const { case StatementType::CREATE_MODEL: { const auto& create_stmt = static_cast(statement); query = ExecuteQueryWithStorage([&create_stmt](duckdb::Connection& con) { - // Check if model already exists auto result = con.Query(duckdb_fmt::format( " SELECT model_name" " FROM flock_storage.flock_config.FLOCKMTL_MODEL_DEFAULT_INTERNAL_TABLE" " WHERE model_name = '{}'" " UNION ALL " " SELECT model_name " - " FROM {}flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " FROM flock_storage.flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" + " WHERE model_name = '{}'" + " UNION ALL " + " SELECT model_name " + " FROM flock_config.FLOCKMTL_MODEL_USER_DEFINED_INTERNAL_TABLE" " WHERE model_name = '{}';", - create_stmt.model_name, create_stmt.catalog.empty() ? "flock_storage." : "", create_stmt.model_name)); + create_stmt.model_name, create_stmt.model_name, create_stmt.model_name)); auto& materialized_result = result->Cast(); if (materialized_result.RowCount() != 0) { diff --git a/src/custom_parser/query/prompt_parser.cpp b/src/custom_parser/query/prompt_parser.cpp index 7f4bcfa5..5ba21297 100644 --- a/src/custom_parser/query/prompt_parser.cpp +++ b/src/custom_parser/query/prompt_parser.cpp @@ -219,10 +219,13 @@ std::string PromptParser::ToSQL(const QueryStatement& statement) const { const auto& create_stmt = static_cast(statement); query = ExecuteQueryWithStorage([&create_stmt](duckdb::Connection& con) { auto result = con.Query(duckdb_fmt::format(" SELECT prompt_name " - " FROM {}flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " FROM flock_storage.flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" + " WHERE prompt_name = '{}'" + " UNION ALL " + " SELECT prompt_name " + " FROM flock_config.FLOCKMTL_PROMPT_INTERNAL_TABLE" " WHERE prompt_name = '{}';", - create_stmt.catalog.empty() ? "flock_storage." : "", - create_stmt.prompt_name)); + create_stmt.prompt_name, create_stmt.prompt_name)); auto& materialized_result = result->Cast(); if (materialized_result.RowCount() != 0) {