Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
# Repo and framework name

Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in
Refer to the repo/framework/runtime "executorch" (in lower cases) or "ExecuTorch" (in
camel cases), not "ExecutorTorch". With limited code or comment length, maybe refer
to the framework "ET" but consider it as very unofficial and not recommended.

# fbcode vs xplat (internal builds only)

When building internally under fbsource, only edit files in `fbcode/executorch/`.
The `xplat/executorch/` directory is automatically mirrored from fbcode.

This does not apply to OSS builds (i.e., the standalone executorch repository).

# Install

## Python

If the user is mostly importing `executorch` module and experimenting with Ahead-Of-Time
export flow, installation means installing `executorch` python package.

Python virtual environment or conda environment is highly recommended for installing
Python virtual environment or conda environment is highly recommended for installing
executorch from source. Double check if the user wants to enable virtual enablement before
building from source.

Expand Down
11 changes: 8 additions & 3 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ DEFINE_string(
"etdump.in",
"If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes.");

DEFINE_string(
method_name,
"forward",
"Method name to execute in the model (e.g., 'forward', 'lora_forward').");

// Helper function to parse comma-separated string lists
std::vector<std::string> parseStringList(const std::string& input) {
std::vector<std::string> result;
Expand Down Expand Up @@ -145,11 +150,11 @@ int32_t main(int32_t argc, char** argv) {
data_paths,
temperature,
#ifdef ET_EVENT_TRACER_ENABLED
std::move(etdump_gen_ptr)
std::move(etdump_gen_ptr),
#else
nullptr
nullptr,
#endif
);
FLAGS_method_name);

if (runner == nullptr) {
ET_LOG(Error, "Failed to create llama runner");
Expand Down
15 changes: 10 additions & 5 deletions examples/models/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& tokenizer_path,
std::optional<const std::string> data_path,
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
const std::string& method_name) {
if (data_path.has_value()) {
std::vector<std::string> data_files;
data_files.push_back(data_path.value());
Expand All @@ -46,22 +47,25 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
tokenizer_path,
std::move(data_files),
temperature,
std::move(event_tracer));
std::move(event_tracer),
method_name);
}
return create_llama_runner(
model_path,
tokenizer_path,
std::vector<std::string>(),
temperature,
std::move(event_tracer));
std::move(event_tracer),
method_name);
}

std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::vector<std::string> data_files,
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
const std::string& method_name) {
ET_LOG(
Info,
"Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
Expand All @@ -84,7 +88,8 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
std::move(tokenizer),
data_files,
temperature,
std::move(event_tracer));
std::move(event_tracer),
method_name);
}

} // namespace example
6 changes: 4 additions & 2 deletions examples/models/llama/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& tokenizer_path,
std::optional<const std::string> data_path,
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
const std::string& method_name = "forward");

std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::vector<std::string> data_files = {},
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
const std::string& method_name = "forward");

std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
const std::string& tokenizer_path,
Expand Down
25 changes: 17 additions & 8 deletions extension/llm/runner/llm_runner_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,26 +182,35 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::optional<const std::string> data_path,
float temperature) {
float temperature,
const std::string& method_name) {
if (data_path.has_value()) {
std::vector<std::string> data_files;
data_files.push_back(data_path.value());
return create_text_llm_runner(
model_path, std::move(tokenizer), std::move(data_files), temperature);
model_path,
std::move(tokenizer),
std::move(data_files),
temperature,
nullptr,
method_name);
}
return create_text_llm_runner(
model_path,
std::move(tokenizer),
std::vector<std::string>(),
temperature);
temperature,
nullptr,
method_name);
}

std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::vector<std::string> data_files,
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) {
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer,
const std::string& method_name) {
// Sanity check tokenizer
if (!tokenizer || !tokenizer->is_loaded()) {
ET_LOG(Error, "Tokenizer is null or not loaded");
Expand Down Expand Up @@ -236,10 +245,10 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
// Create IOManager
std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>(*module);

// Create text_decoder_runner. Use a shared_ptr so that it can be shared with
// TextPrefiller and TextTokenGenerator
auto text_decoder_runner =
std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
// Create text_decoder_runner
ET_LOG(Info, "Using method: %s", method_name.c_str());
auto text_decoder_runner = std::make_unique<TextDecoderRunner>(
module.get(), io_manager.get(), method_name);

// Create text_prefiller
auto text_prefiller = std::make_unique<TextPrefiller>(
Expand Down
9 changes: 7 additions & 2 deletions extension/llm/runner/llm_runner_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,16 @@ ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
* @param data_path Optional path to additional data required by the model
* @param temperature Optional temperature parameter for controlling randomness
* (deprecated)
* @param method_name Name of the method to execute in the model
* @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
* nullptr on failure
*/
ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::optional<const std::string> data_path,
float temperature = -1.0f);
float temperature = -1.0f,
const std::string& method_name = "forward");

/**
* @brief Creates a TextLLMRunner instance with dependency injection
Expand All @@ -116,6 +118,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
* @param data_files Vector of paths to additional data required by the model
* @param temperature Optional temperature parameter for controlling randomness
* (deprecated)
* @param event_tracer Optional event tracer for profiling
* @param method_name Name of the method to execute in the model
* @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
* nullptr on failure
*/
Expand All @@ -124,7 +128,8 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::vector<std::string> data_files = {},
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr);
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr,
const std::string& method_name = "forward");

/**
* @brief Creates a MultimodalRunner instance with dependency injection
Expand Down
35 changes: 35 additions & 0 deletions extension/llm/runner/test/test_text_decoder_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,41 @@ class TextDecoderRunnerTest : public Test {
std::unique_ptr<IOManager> io_manager_;
};

// Test that method_name defaults to "forward"
TEST_F(TextDecoderRunnerTest, MethodNameDefaultsToForward) {
EXPECT_EQ(runner_->method_name(), "forward");
}

// Test that method_name can be set to a custom value
TEST_F(TextDecoderRunnerTest, MethodNameCustomValue) {
auto custom_runner = std::make_unique<TextDecoderRunner>(
mock_module_.get(), io_manager_.get(), "encode");
EXPECT_EQ(custom_runner->method_name(), "encode");
}

// Test that load() uses method_name (not hardcoded "forward")
TEST_F(TextDecoderRunnerTest, LoadUsesMethodName) {
// Get an available model
const char* model_path = std::getenv("KVCACHE_CACHE_POS");
if (!model_path) {
GTEST_SKIP() << "No PTE model environment variable set";
}
auto module = std::make_unique<Module>(model_path);
auto load_result = module->load();
if (load_result != Error::Ok) {
GTEST_SKIP() << "Failed to load model";
}

auto io_mgr = std::make_unique<IOManager>(*module);

// Create runner with a method name that doesn't exist
TextDecoderRunner runner(module.get(), io_mgr.get(), "nonexistent_method");

// load() should fail because "nonexistent_method" doesn't exist
auto result = runner.load();
EXPECT_NE(result, Error::Ok);
}

// Test logits_to_token() method with Float tensor
TEST_F(TextDecoderRunnerTest, LogitsToTokenFloat) {
TensorFactory<executorch::aten::ScalarType::Float> tf_float;
Expand Down
27 changes: 17 additions & 10 deletions extension/llm/runner/text_decoder_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ namespace llm {
// NOTE: we observed ~2x loading performance increase on iPhone 15
// and a ~5% improvement on Galaxy S22 by switching to
// FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
TextDecoderRunner::TextDecoderRunner(Module* module, IOManager* io_manager)
: module_(module), io_manager_(io_manager) {}
TextDecoderRunner::TextDecoderRunner(
Module* module,
IOManager* io_manager,
std::string method_name)
: module_(module),
io_manager_(io_manager),
method_name_(std::move(method_name)) {}

// This function is functional, meaning it shouldn't modify any state of the
// input. It should be safe to call multiple times with the same inputs. The
Expand All @@ -32,7 +37,7 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
TensorPtr& tokens,
int64_t start_pos) {
// ET_LOG(Info, "Input token %" PRIu64, input_token);
auto method_meta_result = module_->method_meta("forward");
auto method_meta_result = module_->method_meta(method_name_);
if (!method_meta_result.ok()) {
return method_meta_result.error();
}
Expand All @@ -44,25 +49,26 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(

if (use_kv_cache) {
auto start_pos_tensor_result = populate_start_pos_or_cache_position(
module_, start_pos, cache_positions, tokens->numel(), "forward");
module_, start_pos, cache_positions, tokens->numel(), method_name_.c_str());
if (!start_pos_tensor_result.ok()) {
return start_pos_tensor_result.error();
}
auto start_pos_tensor = std::move(*start_pos_tensor_result);

std::vector<runtime::EValue> inputs;
auto inputs_res = io_manager_->prepare_decode(tokens, start_pos_tensor);
auto inputs_res =
io_manager_->prepare_decode(tokens, start_pos_tensor, method_name_);
ET_CHECK_OK_OR_RETURN_ERROR(inputs_res.error());
inputs = inputs_res.get();
auto outputs_res = module_->forward(inputs);
auto outputs_res = module_->execute(method_name_, inputs);
ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());

auto update_err = io_manager_->update_decode(outputs_res.get());
auto update_err = io_manager_->update_decode(outputs_res.get(), method_name_);
ET_CHECK_OK_OR_RETURN_ERROR(update_err);

ET_CHECK_MSG(
outputs_res.get().size() == 1,
"More then one output returned from executing LLM.");
"More than one output returned from executing LLM.");
ET_CHECK_MSG(
outputs_res.get()[0].isTensor(),
"Non Tensor Output returned from executing LLM");
Expand All @@ -72,11 +78,12 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
} else { // no kv cache
(void)start_pos; // unused

auto outputs_res = module_->forward(tokens);
std::vector<runtime::EValue> inputs{tokens};
auto outputs_res = module_->execute(method_name_, inputs);
ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
ET_CHECK_MSG(
outputs_res.get().size() == 1,
"More then one output returned from executing LLM.");
"More than one output returned from executing LLM.");
ET_CHECK_MSG(
outputs_res.get()[0].isTensor(),
"Non Tensor Output returned from executing LLM");
Expand Down
25 changes: 22 additions & 3 deletions extension/llm/runner/text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ namespace llm {

class ET_EXPERIMENTAL TextDecoderRunner {
public:
explicit TextDecoderRunner(Module* module, IOManager* io_manager);
explicit TextDecoderRunner(
Module* module,
IOManager* io_manager,
std::string method_name = "forward");

virtual ~TextDecoderRunner() = default;

Expand All @@ -40,15 +43,30 @@ class ET_EXPERIMENTAL TextDecoderRunner {
* @return The error code.
*/
virtual ::executorch::runtime::Error load() {
return module_->load_method("forward");
auto err = module_->load_method(method_name_);
if (err != ::executorch::runtime::Error::Ok) {
ET_LOG(
Error,
"Failed to load method '%s'. Check available methods in the model.",
method_name_.c_str());
}
return err;
}

/**
* Check if the required methods in the Module is loaded.
* @return True if the Module is loaded, false otherwise.
*/
virtual bool is_method_loaded() {
return module_->is_method_loaded("forward");
return module_->is_method_loaded(method_name_);
}

/**
* Get the method name used by this runner.
* @return The method name.
*/
const std::string& method_name() const {
return method_name_;
}

inline void stop() {
Expand Down Expand Up @@ -79,6 +97,7 @@ class ET_EXPERIMENTAL TextDecoderRunner {
*/
Module* module_;
IOManager* io_manager_;
std::string method_name_;
bool should_stop_{false};
};

Expand Down
Loading