From 0e60d1b32a08e6dd2bfdac8de3b769d302c0a8e2 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sat, 27 Sep 2025 11:44:29 -0700 Subject: [PATCH] [multimodal] Allow generate and prefill to take move sematics As titled --- extension/llm/runner/multimodal_runner.cpp | 15 ++++++++++++++ extension/llm/runner/multimodal_runner.h | 24 ++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/extension/llm/runner/multimodal_runner.cpp b/extension/llm/runner/multimodal_runner.cpp index 8b7e4e315d8..c1c99ad6c9f 100644 --- a/extension/llm/runner/multimodal_runner.cpp +++ b/extension/llm/runner/multimodal_runner.cpp @@ -62,6 +62,11 @@ Error MultimodalRunner::load() { ET_LOG(Info, format, __VA_ARGS__); \ } +Error MultimodalRunner::prefill(std::vector&& inputs) { + // Forward to the const reference version + return prefill(inputs); +} + Error MultimodalRunner::prefill(const std::vector& inputs) { if (!is_loaded()) { ET_CHECK_OK_OR_RETURN_ERROR(load()); @@ -72,6 +77,16 @@ Error MultimodalRunner::prefill(const std::vector& inputs) { return Error::Ok; } +Error MultimodalRunner::generate( + std::vector&& inputs, + const GenerationConfig& config, + std::function token_callback, + std::function stats_callback) { + // Forward to the const reference version + return generate( + inputs, config, std::move(token_callback), std::move(stats_callback)); +} + Error MultimodalRunner::generate( const std::vector& inputs, const GenerationConfig& config, diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index caf3c296038..eccf5bde301 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -119,6 +119,21 @@ class ET_EXPERIMENTAL MultimodalRunner { std::function token_callback = {}, std::function stats_callback = {}); + /** + * Generate tokens from multimodal inputs with move semantics. + * This overload allows efficient transfer of temporary vectors. + * @param inputs A vector of MultimodalInput objects (moved). + * @param config Generation configuration parameters. + * @param token_callback Callback function called for each generated token. + * @param stats_callback Callback function for generation statistics. + * @return The error code. KV cache position is tracked internally in pos_. + */ + virtual ::executorch::runtime::Error generate( + std::vector&& inputs, + const GenerationConfig& config, + std::function token_callback = {}, + std::function stats_callback = {}); + /** * Prefill multimodal inputs, for example to reload chat history. * @param inputs A vector of MultimodalInput objects containing images and @@ -128,6 +143,15 @@ class ET_EXPERIMENTAL MultimodalRunner { virtual ::executorch::runtime::Error prefill( const std::vector& inputs); + /** + * Prefill multimodal inputs with move semantics. + * This overload allows efficient transfer of temporary vectors. + * @param inputs A vector of MultimodalInput objects (moved). + * @return The error code. KV cache position is tracked internally in pos_. + */ + virtual ::executorch::runtime::Error prefill( + std::vector&& inputs); + inline void stop() { text_token_generator_->stop(); }