Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ exclude_patterns = [
'examples/demo-apps/apple_ios/**',
'examples/demo-apps/react-native/rnllama/ios/**',
'extension/apple/**',
'extension/llm/apple/**',
# File contains @generated
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,16 @@

NS_ASSUME_NONNULL_BEGIN

FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;

@interface LLaMARunner : NSObject

- (instancetype)initWithModelPath:(NSString*)filePath
tokenizerPath:(NSString*)tokenizerPath;
- (instancetype)initWithModelPath:(NSString *)modelPath
tokenizerPath:(NSString *)tokenizerPath;
- (BOOL)isLoaded;
- (BOOL)loadWithError:(NSError**)error;
- (BOOL)generate:(NSString*)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString*))callback
error:(NSError**)error;
- (BOOL)loadWithError:(NSError **)error;
- (BOOL)generate:(NSString *)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error;
- (void)stop;

+ (instancetype)new NS_UNAVAILABLE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,29 @@
#import "LLaMARunner.h"

#import <ExecuTorch/ExecuTorchLog.h>
#import <executorch/extension/llm/runner/text_llm_runner.h>
#import <ExecuTorchLLM/ExecuTorchLLM.h>
#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>

using namespace executorch::extension;
using namespace executorch::runtime;

NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";

@interface LLaMARunner ()<ExecuTorchLogSink>
@end

@implementation LLaMARunner {
std::unique_ptr<llm::TextLLMRunner> _runner;
ExecuTorchTextLLMRunner *_runner;
}

- (instancetype)initWithModelPath:(NSString*)modelPath
tokenizerPath:(NSString*)tokenizerPath {
- (instancetype)initWithModelPath:(NSString *)modelPath
tokenizerPath:(NSString *)tokenizerPath {
self = [super init];
if (self) {
[ExecuTorchLog.sharedLog addSink:self];
_runner = llm::create_text_llm_runner(
modelPath.UTF8String,
llm::load_tokenizer(
tokenizerPath.UTF8String,
example::get_special_tokens(example::Version::Default)
)
);
auto tokens = example::get_special_tokens(example::Version::Default);
NSMutableArray<NSString*> *specialTokens = [[NSMutableArray alloc] initWithCapacity:tokens->size()];
for (const auto &token : *tokens) {
[specialTokens addObject:(NSString *)@(token.c_str())];
}
_runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
tokenizerPath:tokenizerPath
specialTokens:specialTokens];
}
return self;
}
Expand All @@ -45,45 +41,25 @@ - (void)dealloc {
}

- (BOOL)isLoaded {
return _runner->is_loaded();
return [_runner isLoaded];
}

- (BOOL)loadWithError:(NSError**)error {
const auto status = _runner->load();
if (status != Error::Ok) {
if (error) {
*error = [NSError errorWithDomain:LLaMARunnerErrorDomain
code:(NSInteger)status
userInfo:nil];
}
return NO;
}
return YES;
return [_runner loadWithError:error];
}

- (BOOL)generate:(NSString*)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString*))callback
error:(NSError**)error {
const auto status = _runner->generate(
prompt.UTF8String,
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
[callback](const std::string& token) {
callback(@(token.c_str()));
});
if (status != Error::Ok) {
if (error) {
*error = [NSError errorWithDomain:LLaMARunnerErrorDomain
code:(NSInteger)status
userInfo:nil];
}
return NO;
}
return YES;
- (BOOL)generate:(NSString *)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error {
return [_runner generate:prompt
sequenceLength:seq_len
withTokenCallback:callback
error:error];
}

- (void)stop {
_runner->stop();
[_runner stop];
}

#pragma mark - ExecuTorchLogSink
Expand Down
9 changes: 9 additions & 0 deletions extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import "ExecuTorchTextLLMRunner.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

FOUNDATION_EXPORT NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain;

/**
A wrapper class for the C++ llm::TextLLMRunner that provides
Objective-C APIs to load models, manage tokenization with custom
special tokens, generate text sequences, and stop the runner.
*/
NS_SWIFT_NAME(TextLLMRunner)
__attribute__((deprecated("This API is experimental.")))
@interface ExecuTorchTextLLMRunner : NSObject

/**
Initializes a text LLM runner with the given model and tokenizer paths,
and a list of special tokens to include in the tokenizer.

@param modelPath File system path to the serialized model.
@param tokenizerPath File system path to the tokenizer data.
@param tokens An array of NSString special tokens to use during tokenization.
@return An initialized ExecuTorchTextLLMRunner instance.
*/
- (instancetype)initWithModelPath:(NSString *)modelPath
tokenizerPath:(NSString *)tokenizerPath
specialTokens:(NSArray<NSString *> *)tokens;

/**
Checks whether the underlying model has been successfully loaded.

@return YES if the model is loaded, NO otherwise.
*/
- (BOOL)isLoaded;

/**
Loads the model into memory, returning an error if loading fails.

@param error On failure, populated with an NSError explaining the issue.
@return YES if loading succeeds, NO if an error occurred.
*/
- (BOOL)loadWithError:(NSError **)error;

/**
Generates text given an input prompt, up to a specified sequence length.
Invokes the provided callback for each generated token.

@param prompt The initial text prompt to generate from.
@param seq_len The maximum number of tokens to generate.
@param callback A block called with each generated token as an NSString.
@param error On failure, populated with an NSError explaining the issue.
@return YES if generation completes successfully, NO if an error occurred.
*/
- (BOOL)generate:(NSString *)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString *))callback
error:(NSError **)error;

/**
Stops any ongoing generation and cleans up internal resources.
*/
- (void)stop;

@end

NS_ASSUME_NONNULL_END
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import "ExecuTorchTextLLMRunner.h"

#import <executorch/extension/llm/runner/text_llm_runner.h>

using namespace executorch::extension;
using namespace executorch::runtime;

NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain = @"ExecuTorchTextLLMRunnerErrorDomain";

@implementation ExecuTorchTextLLMRunner {
NSString *_modelPath;
NSString *_tokenizerPath;
std::unique_ptr<std::vector<std::string>> _specialTokens;
std::unique_ptr<llm::TextLLMRunner> _runner;
}

- (instancetype)initWithModelPath:(NSString*)modelPath
tokenizerPath:(NSString*)tokenizerPath
specialTokens:(NSArray<NSString*>*)tokens {
self = [super init];
if (self) {
_modelPath = [modelPath copy];
_tokenizerPath = [tokenizerPath copy];
_specialTokens = std::make_unique<std::vector<std::string>>();
for (NSString *token in tokens) {
_specialTokens->emplace_back(token.UTF8String);
}
}
return self;
}

- (BOOL)isLoaded {
return _runner && _runner->is_loaded();
}

- (BOOL)loadWithError:(NSError**)error {
if (![self isLoaded]) {
_runner = llm::create_text_llm_runner(
_modelPath.UTF8String,
llm::load_tokenizer(_tokenizerPath.UTF8String, std::move(_specialTokens))
);
if (!_runner) {
if (error) {
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
code:-1
userInfo:@{NSLocalizedDescriptionKey: @"Failed to create runner"}];
}
return NO;
}
}
auto status = _runner->load();
if (status != Error::Ok) {
if (error) {
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
code:(NSInteger)status
userInfo:nil];
}
return NO;
}
return YES;
}

- (BOOL)generate:(NSString*)prompt
sequenceLength:(NSInteger)seq_len
withTokenCallback:(nullable void (^)(NSString*))callback
error:(NSError**)error {
if (![self loadWithError:error]) {
return NO;
}
auto status = _runner->generate(
prompt.UTF8String,
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
[callback](const std::string& token) {
if (callback) callback(@(token.c_str()));
}
);
if (status != Error::Ok) {
if (error) {
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
code:(NSInteger)status
userInfo:nil];
}
return NO;
}
return YES;
}

- (void)stop {
if (_runner) {
_runner->stop();
}
}

@end
2 changes: 2 additions & 0 deletions extension/llm/runner/text_llm_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <executorch/extension/llm/runner/text_llm_runner.h>
#include <executorch/extension/llm/runner/util.h>
#include <executorch/runtime/platform/runtime.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>
#include <pytorch/tokenizers/sentencepiece.h>
Expand Down Expand Up @@ -256,6 +257,7 @@ std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
std::optional<std::string> pattern,
size_t bos_token_index,
size_t eos_token_index) {
runtime::runtime_init();
auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded json tokenizer");
Expand Down
Loading