From 8481fd59d438f0a40abe87c65dec5732ae2a6b93 Mon Sep 17 00:00:00 2001 From: Github Executorch Date: Wed, 4 Feb 2026 18:22:06 -0800 Subject: [PATCH] Support multimethod in runner Specify which method to call in the runner. Update CLAUDE.md for fbcode/xplat. Differential Revision: [D92225533](https://our.internmc.facebook.com/intern/diff/D92225533/) [ghstack-poisoned] --- CLAUDE.md | 9 ++++- examples/models/llama/main.cpp | 11 ++++-- examples/models/llama/runner/runner.cpp | 15 +++++--- examples/models/llama/runner/runner.h | 6 ++-- extension/llm/runner/llm_runner_helper.cpp | 25 ++++++++----- extension/llm/runner/llm_runner_helper.h | 9 +++-- .../runner/test/test_text_decoder_runner.cpp | 35 +++++++++++++++++++ extension/llm/runner/text_decoder_runner.cpp | 27 ++++++++------ extension/llm/runner/text_decoder_runner.h | 25 +++++++++++-- 9 files changed, 128 insertions(+), 34 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5cb0f55e786..dcb327696e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,9 +1,16 @@ # Repo and framework name -Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in +Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in camel cases), not "ExecutorTorch". With limited code or comment length, maybe refer to the framework "ET" but consider it as very unofficial and not recommended. +# fbcode vs xplat (internal builds only) + +When building internally under fbsource, only edit files in `fbcode/executorch/`. +The `xplat/executorch/` directory is automatically mirrored from fbcode. + +This does not apply to OSS builds (i.e., the standalone executorch repository). + # Commit messages Don't commit unless the user explicitly asks you to. diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp index 3de47598426..80ece46a1bb 100644 --- a/examples/models/llama/main.cpp +++ b/examples/models/llama/main.cpp @@ -77,6 +77,11 @@ DEFINE_string( "etdump.in", "If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes."); +DEFINE_string( + method_name, + "forward", + "Method name to execute in the model (e.g., 'forward', 'lora_forward')."); + // Helper function to parse comma-separated string lists std::vector parseStringList(const std::string& input) { std::vector result; @@ -145,11 +150,11 @@ int32_t main(int32_t argc, char** argv) { data_paths, temperature, #ifdef ET_EVENT_TRACER_ENABLED - std::move(etdump_gen_ptr) + std::move(etdump_gen_ptr), #else - nullptr + nullptr, #endif - ); + FLAGS_method_name); if (runner == nullptr) { ET_LOG(Error, "Failed to create llama runner"); diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index d2db805405e..3e26e5334e3 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -37,7 +37,8 @@ std::unique_ptr create_llama_runner( const std::string& tokenizer_path, std::optional data_path, float temperature, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer, + const std::string& method_name) { if (data_path.has_value()) { std::vector data_files; data_files.push_back(data_path.value()); @@ -46,14 +47,16 @@ std::unique_ptr create_llama_runner( tokenizer_path, std::move(data_files), temperature, - std::move(event_tracer)); + std::move(event_tracer), + method_name); } return create_llama_runner( model_path, tokenizer_path, std::vector(), temperature, - std::move(event_tracer)); + std::move(event_tracer), + method_name); } std::unique_ptr create_llama_runner( @@ -61,7 +64,8 @@ std::unique_ptr create_llama_runner( const std::string& tokenizer_path, std::vector data_files, float temperature, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer, + const std::string& method_name) { ET_LOG( Info, "Creating LLaMa runner: model_path=%s, tokenizer_path=%s", @@ -84,7 +88,8 @@ std::unique_ptr create_llama_runner( std::move(tokenizer), data_files, temperature, - std::move(event_tracer)); + std::move(event_tracer), + method_name); } } // namespace example diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h index 10225fcb81d..00d0832908b 100644 --- a/examples/models/llama/runner/runner.h +++ b/examples/models/llama/runner/runner.h @@ -29,14 +29,16 @@ std::unique_ptr create_llama_runner( const std::string& tokenizer_path, std::optional data_path, float temperature = -1.0f, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr, + const std::string& method_name = "forward"); std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::vector data_files = {}, float temperature = -1.0f, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr, + const std::string& method_name = "forward"); std::unique_ptr load_llama_tokenizer( const std::string& tokenizer_path, diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp index 13f8d7a9db5..25846a2c5bc 100644 --- a/extension/llm/runner/llm_runner_helper.cpp +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -182,18 +182,26 @@ std::unique_ptr create_text_llm_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::optional data_path, - float temperature) { + float temperature, + const std::string& method_name) { if (data_path.has_value()) { std::vector data_files; data_files.push_back(data_path.value()); return create_text_llm_runner( - model_path, std::move(tokenizer), std::move(data_files), temperature); + model_path, + std::move(tokenizer), + std::move(data_files), + temperature, + nullptr, + method_name); } return create_text_llm_runner( model_path, std::move(tokenizer), std::vector(), - temperature); + temperature, + nullptr, + method_name); } std::unique_ptr create_text_llm_runner( @@ -201,7 +209,8 @@ std::unique_ptr create_text_llm_runner( std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::vector data_files, float temperature, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer, + const std::string& method_name) { // Sanity check tokenizer if (!tokenizer || !tokenizer->is_loaded()) { ET_LOG(Error, "Tokenizer is null or not loaded"); @@ -236,10 +245,10 @@ std::unique_ptr create_text_llm_runner( // Create IOManager std::unique_ptr io_manager = std::make_unique(*module); - // Create text_decoder_runner. Use a shared_ptr so that it can be shared with - // TextPrefiller and TextTokenGenerator - auto text_decoder_runner = - std::make_unique(module.get(), io_manager.get()); + // Create text_decoder_runner + ET_LOG(Info, "Using method: %s", method_name.c_str()); + auto text_decoder_runner = std::make_unique( + module.get(), io_manager.get(), method_name); // Create text_prefiller auto text_prefiller = std::make_unique( diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h index 424567b7c2b..373124d8560 100644 --- a/extension/llm/runner/llm_runner_helper.h +++ b/extension/llm/runner/llm_runner_helper.h @@ -95,6 +95,7 @@ ET_EXPERIMENTAL std::unordered_set get_eos_ids( * @param data_path Optional path to additional data required by the model * @param temperature Optional temperature parameter for controlling randomness * (deprecated) + * @param method_name Name of the method to execute in the model * @return std::unique_ptr Initialized TextLLMRunner instance, or * nullptr on failure */ @@ -102,7 +103,8 @@ ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::optional data_path, - float temperature = -1.0f); + float temperature = -1.0f, + const std::string& method_name = "forward"); /** * @brief Creates a TextLLMRunner instance with dependency injection @@ -116,6 +118,8 @@ ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( * @param data_files Vector of paths to additional data required by the model * @param temperature Optional temperature parameter for controlling randomness * (deprecated) + * @param event_tracer Optional event tracer for profiling + * @param method_name Name of the method to execute in the model * @return std::unique_ptr Initialized TextLLMRunner instance, or * nullptr on failure */ @@ -124,7 +128,8 @@ ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::vector data_files = {}, float temperature = -1.0f, - std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr, + const std::string& method_name = "forward"); /** * @brief Creates a MultimodalRunner instance with dependency injection diff --git a/extension/llm/runner/test/test_text_decoder_runner.cpp b/extension/llm/runner/test/test_text_decoder_runner.cpp index 0001509ec55..917467e31fd 100644 --- a/extension/llm/runner/test/test_text_decoder_runner.cpp +++ b/extension/llm/runner/test/test_text_decoder_runner.cpp @@ -47,6 +47,41 @@ class TextDecoderRunnerTest : public Test { std::unique_ptr io_manager_; }; +// Test that method_name defaults to "forward" +TEST_F(TextDecoderRunnerTest, MethodNameDefaultsToForward) { + EXPECT_EQ(runner_->method_name(), "forward"); +} + +// Test that method_name can be set to a custom value +TEST_F(TextDecoderRunnerTest, MethodNameCustomValue) { + auto custom_runner = std::make_unique( + mock_module_.get(), io_manager_.get(), "encode"); + EXPECT_EQ(custom_runner->method_name(), "encode"); +} + +// Test that load() uses method_name (not hardcoded "forward") +TEST_F(TextDecoderRunnerTest, LoadUsesMethodName) { + // Get an available model + const char* model_path = std::getenv("KVCACHE_CACHE_POS"); + if (!model_path) { + GTEST_SKIP() << "No PTE model environment variable set"; + } + auto module = std::make_unique(model_path); + auto load_result = module->load(); + if (load_result != Error::Ok) { + GTEST_SKIP() << "Failed to load model"; + } + + auto io_mgr = std::make_unique(*module); + + // Create runner with a method name that doesn't exist + TextDecoderRunner runner(module.get(), io_mgr.get(), "nonexistent_method"); + + // load() should fail because "nonexistent_method" doesn't exist + auto result = runner.load(); + EXPECT_NE(result, Error::Ok); +} + // Test logits_to_token() method with Float tensor TEST_F(TextDecoderRunnerTest, LogitsToTokenFloat) { TensorFactory tf_float; diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp index 8d51736ace5..5a2d68c1514 100644 --- a/extension/llm/runner/text_decoder_runner.cpp +++ b/extension/llm/runner/text_decoder_runner.cpp @@ -22,8 +22,13 @@ namespace llm { // NOTE: we observed ~2x loading performance increase on iPhone 15 // and a ~5% improvement on Galaxy S22 by switching to // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors. -TextDecoderRunner::TextDecoderRunner(Module* module, IOManager* io_manager) - : module_(module), io_manager_(io_manager) {} +TextDecoderRunner::TextDecoderRunner( + Module* module, + IOManager* io_manager, + std::string method_name) + : module_(module), + io_manager_(io_manager), + method_name_(std::move(method_name)) {} // This function is functional, meaning it shouldn't modify any state of the // input. It should be safe to call multiple times with the same inputs. The @@ -32,7 +37,7 @@ ::executorch::runtime::Result TextDecoderRunner::step( TensorPtr& tokens, int64_t start_pos) { // ET_LOG(Info, "Input token %" PRIu64, input_token); - auto method_meta_result = module_->method_meta("forward"); + auto method_meta_result = module_->method_meta(method_name_); if (!method_meta_result.ok()) { return method_meta_result.error(); } @@ -44,25 +49,26 @@ ::executorch::runtime::Result TextDecoderRunner::step( if (use_kv_cache) { auto start_pos_tensor_result = populate_start_pos_or_cache_position( - module_, start_pos, cache_positions, tokens->numel(), "forward"); + module_, start_pos, cache_positions, tokens->numel(), method_name_.c_str()); if (!start_pos_tensor_result.ok()) { return start_pos_tensor_result.error(); } auto start_pos_tensor = std::move(*start_pos_tensor_result); std::vector inputs; - auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor); + auto inputs_res = + io_manager_->prepare_decode(tokens, start_pos_tensor, method_name_); ET_CHECK_OK_OR_RETURN_ERROR(inputs_res.error()); inputs = inputs_res.get(); - auto outputs_res = module_->forward(inputs); + auto outputs_res = module_->execute(method_name_, inputs); ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error()); - auto update_err = io_manager_->update_decode(outputs_res.get()); + auto update_err = io_manager_->update_decode(outputs_res.get(), method_name_); ET_CHECK_OK_OR_RETURN_ERROR(update_err); ET_CHECK_MSG( outputs_res.get().size() == 1, - "More then one output returned from executing LLM."); + "More than one output returned from executing LLM."); ET_CHECK_MSG( outputs_res.get()[0].isTensor(), "Non Tensor Output returned from executing LLM"); @@ -72,11 +78,12 @@ ::executorch::runtime::Result TextDecoderRunner::step( } else { // no kv cache (void)start_pos; // unused - auto outputs_res = module_->forward(tokens); + std::vector inputs{tokens}; + auto outputs_res = module_->execute(method_name_, inputs); ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error()); ET_CHECK_MSG( outputs_res.get().size() == 1, - "More then one output returned from executing LLM."); + "More than one output returned from executing LLM."); ET_CHECK_MSG( outputs_res.get()[0].isTensor(), "Non Tensor Output returned from executing LLM"); diff --git a/extension/llm/runner/text_decoder_runner.h b/extension/llm/runner/text_decoder_runner.h index 720000185c9..8b855e2924f 100644 --- a/extension/llm/runner/text_decoder_runner.h +++ b/extension/llm/runner/text_decoder_runner.h @@ -20,7 +20,10 @@ namespace llm { class ET_EXPERIMENTAL TextDecoderRunner { public: - explicit TextDecoderRunner(Module* module, IOManager* io_manager); + explicit TextDecoderRunner( + Module* module, + IOManager* io_manager, + std::string method_name = "forward"); virtual ~TextDecoderRunner() = default; @@ -40,7 +43,14 @@ class ET_EXPERIMENTAL TextDecoderRunner { * @return The error code. */ virtual ::executorch::runtime::Error load() { - return module_->load_method("forward"); + auto err = module_->load_method(method_name_); + if (err != ::executorch::runtime::Error::Ok) { + ET_LOG( + Error, + "Failed to load method '%s'. Check available methods in the model.", + method_name_.c_str()); + } + return err; } /** @@ -48,7 +58,15 @@ class ET_EXPERIMENTAL TextDecoderRunner { * @return True if the Module is loaded, false otherwise. */ virtual bool is_method_loaded() { - return module_->is_method_loaded("forward"); + return module_->is_method_loaded(method_name_); + } + + /** + * Get the method name used by this runner. + * @return The method name. + */ + const std::string& method_name() const { + return method_name_; } inline void stop() { @@ -79,6 +97,7 @@ class ET_EXPERIMENTAL TextDecoderRunner { */ Module* module_; IOManager* io_manager_; + std::string method_name_; bool should_stop_{false}; };