pytorch · lucylq · Feb 5, 2026 · Feb 5, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1,17 +1,24 @@
 # Repo and framework name
 
-Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in 
+Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in
 camel cases), not "ExecutorTorch". With limited code or comment length, maybe refer
 to the framework "ET" but consider it as very unofficial and not recommended.
 
+# fbcode vs xplat (internal builds only)
+
+When building internally under fbsource, only edit files in `fbcode/executorch/`.
+The `xplat/executorch/` directory is automatically mirrored from fbcode.
+
+This does not apply to OSS builds (i.e., the standalone executorch repository).
+
 # Install
 
 ## Python
 
 If the user is mostly importing `executorch` module and experimenting with Ahead-Of-Time
 export flow, installation means installing `executorch` python package.
 
-Python virtual environment or conda environment is highly recommended for installing 
+Python virtual environment or conda environment is highly recommended for installing
 executorch from source. Double check if the user wants to enable virtual enablement before
 building from source.
 

@@ -77,6 +77,11 @@ DEFINE_string(
     "etdump.in",
     "If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes.");
 
+DEFINE_string(
+    method_name,
+    "forward",
+    "Method name to execute in the model (e.g., 'forward', 'lora_forward').");
+
 // Helper function to parse comma-separated string lists
 std::vector<std::string> parseStringList(const std::string& input) {
   std::vector<std::string> result;
@@ -145,11 +150,11 @@ int32_t main(int32_t argc, char** argv) {
           data_paths,
           temperature,
 #ifdef ET_EVENT_TRACER_ENABLED
-          std::move(etdump_gen_ptr)
+          std::move(etdump_gen_ptr),
 #else
-          nullptr
+          nullptr,
 #endif
-      );
+          FLAGS_method_name);
 
   if (runner == nullptr) {
     ET_LOG(Error, "Failed to create llama runner");

@@ -37,7 +37,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   if (data_path.has_value()) {
     std::vector<std::string> data_files;
     data_files.push_back(data_path.value());
@@ -46,22 +47,25 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
         tokenizer_path,
         std::move(data_files),
         temperature,
-        std::move(event_tracer));
+        std::move(event_tracer),
+        method_name);
   }
   return create_llama_runner(
       model_path,
       tokenizer_path,
       std::vector<std::string>(),
       temperature,
-      std::move(event_tracer));
+      std::move(event_tracer),
+      method_name);
 }
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::vector<std::string> data_files,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   ET_LOG(
       Info,
       "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
@@ -84,7 +88,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
       std::move(tokenizer),
       data_files,
       temperature,
-      std::move(event_tracer));
+      std::move(event_tracer),
+      method_name);
 }
 
 } // namespace example
@@ -29,14 +29,16 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::vector<std::string> data_files = {},
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
     const std::string& tokenizer_path,

@@ -182,26 +182,35 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::optional<const std::string> data_path,
-    float temperature) {
+    float temperature,
+    const std::string& method_name) {
   if (data_path.has_value()) {
     std::vector<std::string> data_files;
     data_files.push_back(data_path.value());
     return create_text_llm_runner(
-        model_path, std::move(tokenizer), std::move(data_files), temperature);
+        model_path,
+        std::move(tokenizer),
+        std::move(data_files),
+        temperature,
+        nullptr,
+        method_name);
   }
   return create_text_llm_runner(
       model_path,
       std::move(tokenizer),
       std::vector<std::string>(),
-      temperature);
+      temperature,
+      nullptr,
+      method_name);
 }
 
 std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files,
     float temperature,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
+    const std::string& method_name) {
   // Sanity check tokenizer
   if (!tokenizer || !tokenizer->is_loaded()) {
     ET_LOG(Error, "Tokenizer is null or not loaded");
@@ -236,10 +245,10 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
   // Create IOManager
   std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>(*module);
 
-  // Create text_decoder_runner. Use a shared_ptr so that it can be shared with
-  // TextPrefiller and TextTokenGenerator
-  auto text_decoder_runner =
-      std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
+  // Create text_decoder_runner
+  ET_LOG(Info, "Using method: %s", method_name.c_str());
+  auto text_decoder_runner = std::make_unique<TextDecoderRunner>(
+      module.get(), io_manager.get(), method_name);
 
   // Create text_prefiller
   auto text_prefiller = std::make_unique<TextPrefiller>(

@@ -95,14 +95,16 @@ ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
  * @param data_path Optional path to additional data required by the model
  * @param temperature Optional temperature parameter for controlling randomness
  * (deprecated)
+ * @param method_name Name of the method to execute in the model
  * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
  * nullptr on failure
  */
 ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::optional<const std::string> data_path,
-    float temperature = -1.0f);
+    float temperature = -1.0f,
+    const std::string& method_name = "forward");
 
 /**
  * @brief Creates a TextLLMRunner instance with dependency injection
@@ -116,6 +118,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
  * @param data_files Vector of paths to additional data required by the model
  * @param temperature Optional temperature parameter for controlling randomness
  * (deprecated)
+ * @param event_tracer Optional event tracer for profiling
+ * @param method_name Name of the method to execute in the model
  * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
  * nullptr on failure
  */
@@ -124,7 +128,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files = {},
     float temperature = -1.0f,
-    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
+    const std::string& method_name = "forward");
 
 /**
  * @brief Creates a MultimodalRunner instance with dependency injection

@@ -47,6 +47,41 @@ class TextDecoderRunnerTest : public Test {
   std::unique_ptr<IOManager> io_manager_;
 };
 
+// Test that method_name defaults to "forward"
+TEST_F(TextDecoderRunnerTest, MethodNameDefaultsToForward) {
+  EXPECT_EQ(runner_->method_name(), "forward");
+}
+
+// Test that method_name can be set to a custom value
+TEST_F(TextDecoderRunnerTest, MethodNameCustomValue) {
+  auto custom_runner = std::make_unique<TextDecoderRunner>(
+      mock_module_.get(), io_manager_.get(), "encode");
+  EXPECT_EQ(custom_runner->method_name(), "encode");
+}
+
+// Test that load() uses method_name (not hardcoded "forward")
+TEST_F(TextDecoderRunnerTest, LoadUsesMethodName) {
+  // Get an available model
+  const char* model_path = std::getenv("KVCACHE_CACHE_POS");
+  if (!model_path) {
+    GTEST_SKIP() << "No PTE model environment variable set";
+  }
+  auto module = std::make_unique<Module>(model_path);
+  auto load_result = module->load();
+  if (load_result != Error::Ok) {
+    GTEST_SKIP() << "Failed to load model";
+  }
+
+  auto io_mgr = std::make_unique<IOManager>(*module);
+
+  // Create runner with a method name that doesn't exist
+  TextDecoderRunner runner(module.get(), io_mgr.get(), "nonexistent_method");
+
+  // load() should fail because "nonexistent_method" doesn't exist
+  auto result = runner.load();
+  EXPECT_NE(result, Error::Ok);
+}
+
 // Test logits_to_token() method with Float tensor
 TEST_F(TextDecoderRunnerTest, LogitsToTokenFloat) {
   TensorFactory<executorch::aten::ScalarType::Float> tf_float;

@@ -22,8 +22,13 @@ namespace llm {
 // NOTE: we observed ~2x loading performance increase on iPhone 15
 // and a ~5% improvement on Galaxy S22 by switching to
 // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
-TextDecoderRunner::TextDecoderRunner(Module* module, IOManager* io_manager)
-    : module_(module), io_manager_(io_manager) {}
+TextDecoderRunner::TextDecoderRunner(
+    Module* module,
+    IOManager* io_manager,
+    std::string method_name)
+    : module_(module),
+      io_manager_(io_manager),
+      method_name_(std::move(method_name)) {}
 
 // This function is functional, meaning it shouldn't modify any state of the
 // input. It should be safe to call multiple times with the same inputs. The
@@ -32,7 +37,7 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
     TensorPtr& tokens,
     int64_t start_pos) {
   // ET_LOG(Info, "Input token %" PRIu64, input_token);
-  auto method_meta_result = module_->method_meta("forward");
+  auto method_meta_result = module_->method_meta(method_name_);
   if (!method_meta_result.ok()) {
     return method_meta_result.error();
   }
@@ -44,25 +49,26 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
 
   if (use_kv_cache) {
     auto start_pos_tensor_result = populate_start_pos_or_cache_position(
-        module_, start_pos, cache_positions, tokens->numel(), "forward");
+        module_, start_pos, cache_positions, tokens->numel(), method_name_.c_str());
     if (!start_pos_tensor_result.ok()) {
       return start_pos_tensor_result.error();
     }
     auto start_pos_tensor = std::move(*start_pos_tensor_result);
 
     std::vector<runtime::EValue> inputs;
-    auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);
+    auto inputs_res =
+        io_manager_->prepare_decode(tokens, start_pos_tensor, method_name_);
     ET_CHECK_OK_OR_RETURN_ERROR(inputs_res.error());
     inputs = inputs_res.get();
-    auto outputs_res = module_->forward(inputs);
+    auto outputs_res = module_->execute(method_name_, inputs);
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
 
-    auto update_err = io_manager_->update_decode(outputs_res.get());
+    auto update_err = io_manager_->update_decode(outputs_res.get(), method_name_);
     ET_CHECK_OK_OR_RETURN_ERROR(update_err);
 
     ET_CHECK_MSG(
         outputs_res.get().size() == 1,
-        "More then one output returned from executing LLM.");
+        "More than one output returned from executing LLM.");
     ET_CHECK_MSG(
         outputs_res.get()[0].isTensor(),
         "Non Tensor Output returned from executing LLM");
@@ -72,11 +78,12 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
   } else { // no kv cache
     (void)start_pos; // unused
 
-    auto outputs_res = module_->forward(tokens);
+    std::vector<runtime::EValue> inputs{tokens};
+    auto outputs_res = module_->execute(method_name_, inputs);
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
     ET_CHECK_MSG(
         outputs_res.get().size() == 1,
-        "More then one output returned from executing LLM.");
+        "More than one output returned from executing LLM.");
     ET_CHECK_MSG(
         outputs_res.get()[0].isTensor(),
         "Non Tensor Output returned from executing LLM");

@@ -20,7 +20,10 @@ namespace llm {
 
 class ET_EXPERIMENTAL TextDecoderRunner {
  public:
-  explicit TextDecoderRunner(Module* module, IOManager* io_manager);
+  explicit TextDecoderRunner(
+      Module* module,
+      IOManager* io_manager,
+      std::string method_name = "forward");
 
   virtual ~TextDecoderRunner() = default;
 
@@ -40,15 +43,30 @@ class ET_EXPERIMENTAL TextDecoderRunner {
    * @return The error code.
    */
   virtual ::executorch::runtime::Error load() {
-    return module_->load_method("forward");
+    auto err = module_->load_method(method_name_);
+    if (err != ::executorch::runtime::Error::Ok) {
+      ET_LOG(
+          Error,
+          "Failed to load method '%s'. Check available methods in the model.",
+          method_name_.c_str());
+    }
+    return err;
   }
 
   /**
    * Check if the required methods in the Module is loaded.
    * @return True if the Module is loaded, false otherwise.
    */
   virtual bool is_method_loaded() {
-    return module_->is_method_loaded("forward");
+    return module_->is_method_loaded(method_name_);
+  }
+
+  /**
+   * Get the method name used by this runner.
+   * @return The method name.
+   */
+  const std::string& method_name() const {
+    return method_name_;
   }
 
   inline void stop() {
@@ -79,6 +97,7 @@ class ET_EXPERIMENTAL TextDecoderRunner {
    */
   Module* module_;
   IOManager* io_manager_;
+  std::string method_name_;
   bool should_stop_{false};
 };