From 8481fd59d438f0a40abe87c65dec5732ae2a6b93 Mon Sep 17 00:00:00 2001
From: Github Executorch <github_executorch@arm.com>
Date: Wed, 4 Feb 2026 18:22:06 -0800
Subject: [PATCH] Support multimethod in runner

Specify which method to call in the runner.

Update CLAUDE.md for fbcode/xplat.

Differential Revision: [D92225533](https://our.internmc.facebook.com/intern/diff/D92225533/)

[ghstack-poisoned]
---
 CLAUDE.md                                     |  9 ++++-
 examples/models/llama/main.cpp                | 11 ++++--
 examples/models/llama/runner/runner.cpp       | 15 +++++---
 examples/models/llama/runner/runner.h         |  6 ++--
 extension/llm/runner/llm_runner_helper.cpp    | 25 ++++++++-----
 extension/llm/runner/llm_runner_helper.h      |  9 +++--
 .../runner/test/test_text_decoder_runner.cpp  | 35 +++++++++++++++++++
 extension/llm/runner/text_decoder_runner.cpp  | 27 ++++++++------
 extension/llm/runner/text_decoder_runner.h    | 25 +++++++++++--
 9 files changed, 128 insertions(+), 34 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index 5cb0f55e786..dcb327696e2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,9 +1,16 @@
 # Repo and framework name
 
-Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in 
+Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in
 camel cases), not "ExecutorTorch". With limited code or comment length, maybe refer
 to the framework "ET" but consider it as very unofficial and not recommended.
 
+# fbcode vs xplat (internal builds only)
+
+When building internally under fbsource, only edit files in `fbcode/executorch/`.
+The `xplat/executorch/` directory is automatically mirrored from fbcode.
+
+This does not apply to OSS builds (i.e., the standalone executorch repository).
+
 # Commit messages
 
 Don't commit unless the user explicitly asks you to.
diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp
index 3de47598426..80ece46a1bb 100644
--- a/examples/models/llama/main.cpp
+++ b/examples/models/llama/main.cpp
@@ -77,6 +77,11 @@ DEFINE_string(
     "etdump.in",
     "If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes.");
 
+DEFINE_string(
+    method_name,
+    "forward",
+    "Method name to execute in the model (e.g., 'forward', 'lora_forward').");
+
 // Helper function to parse comma-separated string lists
 std::vector<std::string> parseStringList(const std::string& input) {
   std::vector<std::string> result;
@@ -145,11 +150,11 @@ int32_t main(int32_t argc, char** argv) {
           data_paths,
           temperature,
 #ifdef ET_EVENT_TRACER_ENABLED
-          std::move(etdump_gen_ptr)
+          std::move(etdump_gen_ptr),
 #else
-          nullptr
+          nullptr,
 #endif
-      );
+          FLAGS_method_name);
 
   if (runner == nullptr) {
     ET_LOG(Error, "Failed to create llama runner");
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index d2db805405e..3e26e5334e3 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -37,7 +37,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   if (data_path.has_value()) {
     std::vector<std::string> data_files;
     data_files.push_back(data_path.value());
@@ -46,14 +47,16 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
         tokenizer_path,
         std::move(data_files),
         temperature,
-        std::move(event_tracer));
+        std::move(event_tracer),
+        method_name);
   }
   return create_llama_runner(
       model_path,
       tokenizer_path,
       std::vector<std::string>(),
       temperature,
-      std::move(event_tracer));
+      std::move(event_tracer),
+      method_name);
 }
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
@@ -61,7 +64,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::vector<std::string> data_files,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   ET_LOG(
       Info,
       "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
@@ -84,7 +88,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
       std::move(tokenizer),
       data_files,
       temperature,
-      std::move(event_tracer));
+      std::move(event_tracer),
+      method_name);
 }
 
 } // namespace example
diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h
index 10225fcb81d..00d0832908b 100644
--- a/examples/models/llama/runner/runner.h
+++ b/examples/models/llama/runner/runner.h
@@ -29,14 +29,16 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::vector<std::string> data_files = {},
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
     const std::string& tokenizer_path,
diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp
index 13f8d7a9db5..25846a2c5bc 100644
--- a/extension/llm/runner/llm_runner_helper.cpp
+++ b/extension/llm/runner/llm_runner_helper.cpp
@@ -182,18 +182,26 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::optional<const std::string> data_path,
-    float temperature) {
+    float temperature,
+    const std::string& method_name) {
   if (data_path.has_value()) {
     std::vector<std::string> data_files;
     data_files.push_back(data_path.value());
     return create_text_llm_runner(
-        model_path, std::move(tokenizer), std::move(data_files), temperature);
+        model_path,
+        std::move(tokenizer),
+        std::move(data_files),
+        temperature,
+        nullptr,
+        method_name);
   }
   return create_text_llm_runner(
       model_path,
       std::move(tokenizer),
       std::vector<std::string>(),
-      temperature);
+      temperature,
+      nullptr,
+      method_name);
 }
 
 std::unique_ptr<TextLLMRunner> create_text_llm_runner(
@@ -201,7 +209,8 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   // Sanity check tokenizer
   if (!tokenizer || !tokenizer->is_loaded()) {
     ET_LOG(Error, "Tokenizer is null or not loaded");
@@ -236,10 +245,10 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
   // Create IOManager
   std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>(*module);
 
-  // Create text_decoder_runner. Use a shared_ptr so that it can be shared with
-  // TextPrefiller and TextTokenGenerator
-  auto text_decoder_runner =
-      std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
+  // Create text_decoder_runner
+  ET_LOG(Info, "Using method: %s", method_name.c_str());
+  auto text_decoder_runner = std::make_unique<TextDecoderRunner>(
+      module.get(), io_manager.get(), method_name);
 
   // Create text_prefiller
   auto text_prefiller = std::make_unique<TextPrefiller>(
diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h
index 424567b7c2b..373124d8560 100644
--- a/extension/llm/runner/llm_runner_helper.h
+++ b/extension/llm/runner/llm_runner_helper.h
@@ -95,6 +95,7 @@ ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
  * @param data_path Optional path to additional data required by the model
  * @param temperature Optional temperature parameter for controlling randomness
  * (deprecated)
+ * @param method_name Name of the method to execute in the model
  * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
  * nullptr on failure
  */
@@ -102,7 +103,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::optional<const std::string> data_path,
-    float temperature = -1.0f);
+    float temperature = -1.0f,
+    const std::string& method_name = "forward");
 
 /**
  * @brief Creates a TextLLMRunner instance with dependency injection
@@ -116,6 +118,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
  * @param data_files Vector of paths to additional data required by the model
  * @param temperature Optional temperature parameter for controlling randomness
  * (deprecated)
+ * @param event_tracer Optional event tracer for profiling
+ * @param method_name Name of the method to execute in the model
  * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
  * nullptr on failure
  */
@@ -124,7 +128,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files = {},
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 /**
  * @brief Creates a MultimodalRunner instance with dependency injection
diff --git a/extension/llm/runner/test/test_text_decoder_runner.cpp b/extension/llm/runner/test/test_text_decoder_runner.cpp
index 0001509ec55..917467e31fd 100644
--- a/extension/llm/runner/test/test_text_decoder_runner.cpp
+++ b/extension/llm/runner/test/test_text_decoder_runner.cpp
@@ -47,6 +47,41 @@ class TextDecoderRunnerTest : public Test {
   std::unique_ptr<IOManager> io_manager_;
 };
 
+// Test that method_name defaults to "forward"
+TEST_F(TextDecoderRunnerTest, MethodNameDefaultsToForward) {
+  EXPECT_EQ(runner_->method_name(), "forward");
+}
+
+// Test that method_name can be set to a custom value
+TEST_F(TextDecoderRunnerTest, MethodNameCustomValue) {
+  auto custom_runner = std::make_unique<TextDecoderRunner>(
+      mock_module_.get(), io_manager_.get(), "encode");
+  EXPECT_EQ(custom_runner->method_name(), "encode");
+}
+
+// Test that load() uses method_name (not hardcoded "forward")
+TEST_F(TextDecoderRunnerTest, LoadUsesMethodName) {
+  // Get an available model
+  const char* model_path = std::getenv("KVCACHE_CACHE_POS");
+  if (!model_path) {
+    GTEST_SKIP() << "No PTE model environment variable set";
+  }
+  auto module = std::make_unique<Module>(model_path);
+  auto load_result = module->load();
+  if (load_result != Error::Ok) {
+    GTEST_SKIP() << "Failed to load model";
+  }
+
+  auto io_mgr = std::make_unique<IOManager>(*module);
+
+  // Create runner with a method name that doesn't exist
+  TextDecoderRunner runner(module.get(), io_mgr.get(), "nonexistent_method");
+
+  // load() should fail because "nonexistent_method" doesn't exist
+  auto result = runner.load();
+  EXPECT_NE(result, Error::Ok);
+}
+
 // Test logits_to_token() method with Float tensor
 TEST_F(TextDecoderRunnerTest, LogitsToTokenFloat) {
   TensorFactory<executorch::aten::ScalarType::Float> tf_float;
diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp
index 8d51736ace5..5a2d68c1514 100644
--- a/extension/llm/runner/text_decoder_runner.cpp
+++ b/extension/llm/runner/text_decoder_runner.cpp
@@ -22,8 +22,13 @@ namespace llm {
 // NOTE: we observed ~2x loading performance increase on iPhone 15
 // and a ~5% improvement on Galaxy S22 by switching to
 // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
-TextDecoderRunner::TextDecoderRunner(Module* module, IOManager* io_manager)
-    : module_(module), io_manager_(io_manager) {}
+TextDecoderRunner::TextDecoderRunner(
+    Module* module,
+    IOManager* io_manager,
+    std::string method_name)
+    : module_(module),
+      io_manager_(io_manager),
+      method_name_(std::move(method_name)) {}
 
 // This function is functional, meaning it shouldn't modify any state of the
 // input. It should be safe to call multiple times with the same inputs. The
@@ -32,7 +37,7 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
     TensorPtr& tokens,
     int64_t start_pos) {
   // ET_LOG(Info, "Input token %" PRIu64, input_token);
-  auto method_meta_result = module_->method_meta("forward");
+  auto method_meta_result = module_->method_meta(method_name_);
   if (!method_meta_result.ok()) {
     return method_meta_result.error();
   }
@@ -44,25 +49,26 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
 
   if (use_kv_cache) {
     auto start_pos_tensor_result = populate_start_pos_or_cache_position(
-        module_, start_pos, cache_positions, tokens->numel(), "forward");
+        module_, start_pos, cache_positions, tokens->numel(), method_name_.c_str());
     if (!start_pos_tensor_result.ok()) {
       return start_pos_tensor_result.error();
     }
     auto start_pos_tensor = std::move(*start_pos_tensor_result);
 
     std::vector<runtime::EValue> inputs;
-    auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);
+    auto inputs_res =
+        io_manager_->prepare_decode(tokens, start_pos_tensor, method_name_);
     ET_CHECK_OK_OR_RETURN_ERROR(inputs_res.error());
     inputs = inputs_res.get();
-    auto outputs_res = module_->forward(inputs);
+    auto outputs_res = module_->execute(method_name_, inputs);
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
 
-    auto update_err = io_manager_->update_decode(outputs_res.get());
+    auto update_err = io_manager_->update_decode(outputs_res.get(), method_name_);
     ET_CHECK_OK_OR_RETURN_ERROR(update_err);
 
     ET_CHECK_MSG(
         outputs_res.get().size() == 1,
-        "More then one output returned from executing LLM.");
+        "More than one output returned from executing LLM.");
     ET_CHECK_MSG(
         outputs_res.get()[0].isTensor(),
         "Non Tensor Output returned from executing LLM");
@@ -72,11 +78,12 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
   } else { // no kv cache
     (void)start_pos; // unused
 
-    auto outputs_res = module_->forward(tokens);
+    std::vector<runtime::EValue> inputs{tokens};
+    auto outputs_res = module_->execute(method_name_, inputs);
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
     ET_CHECK_MSG(
         outputs_res.get().size() == 1,
-        "More then one output returned from executing LLM.");
+        "More than one output returned from executing LLM.");
     ET_CHECK_MSG(
         outputs_res.get()[0].isTensor(),
         "Non Tensor Output returned from executing LLM");
diff --git a/extension/llm/runner/text_decoder_runner.h b/extension/llm/runner/text_decoder_runner.h
index 720000185c9..8b855e2924f 100644
--- a/extension/llm/runner/text_decoder_runner.h
+++ b/extension/llm/runner/text_decoder_runner.h
@@ -20,7 +20,10 @@ namespace llm {
 
 class ET_EXPERIMENTAL TextDecoderRunner {
  public:
-  explicit TextDecoderRunner(Module* module, IOManager* io_manager);
+  explicit TextDecoderRunner(
+      Module* module,
+      IOManager* io_manager,
+      std::string method_name = "forward");
 
   virtual ~TextDecoderRunner() = default;
 
@@ -40,7 +43,14 @@ class ET_EXPERIMENTAL TextDecoderRunner {
    * @return The error code.
    */
   virtual ::executorch::runtime::Error load() {
-    return module_->load_method("forward");
+    auto err = module_->load_method(method_name_);
+    if (err != ::executorch::runtime::Error::Ok) {
+      ET_LOG(
+          Error,
+          "Failed to load method '%s'. Check available methods in the model.",
+          method_name_.c_str());
+    }
+    return err;
   }
 
   /**
@@ -48,7 +58,15 @@ class ET_EXPERIMENTAL TextDecoderRunner {
    * @return True if the Module is loaded, false otherwise.
    */
   virtual bool is_method_loaded() {
-    return module_->is_method_loaded("forward");
+    return module_->is_method_loaded(method_name_);
+  }
+
+  /**
+   * Get the method name used by this runner.
+   * @return The method name.
+   */
+  const std::string& method_name() const {
+    return method_name_;
   }
 
   inline void stop() {
@@ -79,6 +97,7 @@ class ET_EXPERIMENTAL TextDecoderRunner {
    */
   Module* module_;
   IOManager* io_manager_;
+  std::string method_name_;
   bool should_stop_{false};
 };