From db4ce8d39e5ea44b04f9aa88e6d3a5cfacbc4774 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 10 Oct 2025 10:08:03 -0700 Subject: [PATCH] Revert "[multimodal] Allow generate and prefill to take move sematics (#14643)" This reverts commit db8d04fafba92b007c4a6e60d000930b160d91bc. --- extension/llm/runner/multimodal_runner.cpp | 15 -------------- extension/llm/runner/multimodal_runner.h | 24 ---------------------- 2 files changed, 39 deletions(-) diff --git a/extension/llm/runner/multimodal_runner.cpp b/extension/llm/runner/multimodal_runner.cpp index c1c99ad6c9f..8b7e4e315d8 100644 --- a/extension/llm/runner/multimodal_runner.cpp +++ b/extension/llm/runner/multimodal_runner.cpp @@ -62,11 +62,6 @@ Error MultimodalRunner::load() { ET_LOG(Info, format, __VA_ARGS__); \ } -Error MultimodalRunner::prefill(std::vector&& inputs) { - // Forward to the const reference version - return prefill(inputs); -} - Error MultimodalRunner::prefill(const std::vector& inputs) { if (!is_loaded()) { ET_CHECK_OK_OR_RETURN_ERROR(load()); @@ -77,16 +72,6 @@ Error MultimodalRunner::prefill(const std::vector& inputs) { return Error::Ok; } -Error MultimodalRunner::generate( - std::vector&& inputs, - const GenerationConfig& config, - std::function token_callback, - std::function stats_callback) { - // Forward to the const reference version - return generate( - inputs, config, std::move(token_callback), std::move(stats_callback)); -} - Error MultimodalRunner::generate( const std::vector& inputs, const GenerationConfig& config, diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index eccf5bde301..caf3c296038 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -119,21 +119,6 @@ class ET_EXPERIMENTAL MultimodalRunner { std::function token_callback = {}, std::function stats_callback = {}); - /** - * Generate tokens from multimodal inputs with move semantics. - * This overload allows efficient transfer of temporary vectors. - * @param inputs A vector of MultimodalInput objects (moved). - * @param config Generation configuration parameters. - * @param token_callback Callback function called for each generated token. - * @param stats_callback Callback function for generation statistics. - * @return The error code. KV cache position is tracked internally in pos_. - */ - virtual ::executorch::runtime::Error generate( - std::vector&& inputs, - const GenerationConfig& config, - std::function token_callback = {}, - std::function stats_callback = {}); - /** * Prefill multimodal inputs, for example to reload chat history. * @param inputs A vector of MultimodalInput objects containing images and @@ -143,15 +128,6 @@ class ET_EXPERIMENTAL MultimodalRunner { virtual ::executorch::runtime::Error prefill( const std::vector& inputs); - /** - * Prefill multimodal inputs with move semantics. - * This overload allows efficient transfer of temporary vectors. - * @param inputs A vector of MultimodalInput objects (moved). - * @return The error code. KV cache position is tracked internally in pos_. - */ - virtual ::executorch::runtime::Error prefill( - std::vector&& inputs); - inline void stop() { text_token_generator_->stop(); }