diff --git a/.lintrunner.toml b/.lintrunner.toml index 4c881940155..38bbfe7496f 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -76,6 +76,7 @@ exclude_patterns = [ 'examples/demo-apps/apple_ios/**', 'examples/demo-apps/react-native/rnllama/ios/**', 'extension/apple/**', + 'extension/llm/apple/**', # File contains @generated 'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h', 'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h', diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h index 52056975847..1b5ada5138e 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h @@ -10,18 +10,16 @@ NS_ASSUME_NONNULL_BEGIN -FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain; - @interface LLaMARunner : NSObject -- (instancetype)initWithModelPath:(NSString*)filePath - tokenizerPath:(NSString*)tokenizerPath; +- (instancetype)initWithModelPath:(NSString *)modelPath + tokenizerPath:(NSString *)tokenizerPath; - (BOOL)isLoaded; -- (BOOL)loadWithError:(NSError**)error; -- (BOOL)generate:(NSString*)prompt - sequenceLength:(NSInteger)seq_len - withTokenCallback:(nullable void (^)(NSString*))callback - error:(NSError**)error; +- (BOOL)loadWithError:(NSError **)error; +- (BOOL)generate:(NSString *)prompt + sequenceLength:(NSInteger)seq_len + withTokenCallback:(nullable void (^)(NSString *))callback + error:(NSError **)error; - (void)stop; + (instancetype)new NS_UNAVAILABLE; diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm index b91cc79befb..991d2f94868 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm @@ -9,33 +9,29 @@ #import "LLaMARunner.h" #import -#import +#import #import -using namespace executorch::extension; -using namespace executorch::runtime; - -NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain"; - @interface LLaMARunner () @end @implementation LLaMARunner { - std::unique_ptr _runner; + ExecuTorchTextLLMRunner *_runner; } -- (instancetype)initWithModelPath:(NSString*)modelPath - tokenizerPath:(NSString*)tokenizerPath { +- (instancetype)initWithModelPath:(NSString *)modelPath + tokenizerPath:(NSString *)tokenizerPath { self = [super init]; if (self) { [ExecuTorchLog.sharedLog addSink:self]; - _runner = llm::create_text_llm_runner( - modelPath.UTF8String, - llm::load_tokenizer( - tokenizerPath.UTF8String, - example::get_special_tokens(example::Version::Default) - ) - ); + auto tokens = example::get_special_tokens(example::Version::Default); + NSMutableArray *specialTokens = [[NSMutableArray alloc] initWithCapacity:tokens->size()]; + for (const auto &token : *tokens) { + [specialTokens addObject:(NSString *)@(token.c_str())]; + } + _runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath + tokenizerPath:tokenizerPath + specialTokens:specialTokens]; } return self; } @@ -45,45 +41,25 @@ - (void)dealloc { } - (BOOL)isLoaded { - return _runner->is_loaded(); + return [_runner isLoaded]; } - (BOOL)loadWithError:(NSError**)error { - const auto status = _runner->load(); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaMARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - } - return NO; - } - return YES; + return [_runner loadWithError:error]; } -- (BOOL)generate:(NSString*)prompt - sequenceLength:(NSInteger)seq_len - withTokenCallback:(nullable void (^)(NSString*))callback - error:(NSError**)error { - const auto status = _runner->generate( - prompt.UTF8String, - llm::GenerationConfig{.seq_len = static_cast(seq_len)}, - [callback](const std::string& token) { - callback(@(token.c_str())); - }); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaMARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - } - return NO; - } - return YES; +- (BOOL)generate:(NSString *)prompt + sequenceLength:(NSInteger)seq_len + withTokenCallback:(nullable void (^)(NSString *))callback + error:(NSError **)error { + return [_runner generate:prompt + sequenceLength:seq_len + withTokenCallback:callback + error:error]; } - (void)stop { - _runner->stop(); + [_runner stop]; } #pragma mark - ExecuTorchLogSink diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h new file mode 100644 index 00000000000..42e08d18030 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLM.h @@ -0,0 +1,9 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchTextLLMRunner.h" diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h new file mode 100644 index 00000000000..74518605e90 --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import + +NS_ASSUME_NONNULL_BEGIN + +FOUNDATION_EXPORT NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain; + +/** + A wrapper class for the C++ llm::TextLLMRunner that provides + Objective-C APIs to load models, manage tokenization with custom + special tokens, generate text sequences, and stop the runner. +*/ +NS_SWIFT_NAME(TextLLMRunner) +__attribute__((deprecated("This API is experimental."))) +@interface ExecuTorchTextLLMRunner : NSObject + +/** + Initializes a text LLM runner with the given model and tokenizer paths, + and a list of special tokens to include in the tokenizer. + + @param modelPath File system path to the serialized model. + @param tokenizerPath File system path to the tokenizer data. + @param tokens An array of NSString special tokens to use during tokenization. + @return An initialized ExecuTorchTextLLMRunner instance. +*/ +- (instancetype)initWithModelPath:(NSString *)modelPath + tokenizerPath:(NSString *)tokenizerPath + specialTokens:(NSArray *)tokens; + +/** + Checks whether the underlying model has been successfully loaded. + + @return YES if the model is loaded, NO otherwise. +*/ +- (BOOL)isLoaded; + +/** + Loads the model into memory, returning an error if loading fails. + + @param error On failure, populated with an NSError explaining the issue. + @return YES if loading succeeds, NO if an error occurred. +*/ +- (BOOL)loadWithError:(NSError **)error; + +/** + Generates text given an input prompt, up to a specified sequence length. + Invokes the provided callback for each generated token. + + @param prompt The initial text prompt to generate from. + @param seq_len The maximum number of tokens to generate. + @param callback A block called with each generated token as an NSString. + @param error On failure, populated with an NSError explaining the issue. + @return YES if generation completes successfully, NO if an error occurred. +*/ +- (BOOL)generate:(NSString *)prompt + sequenceLength:(NSInteger)seq_len +withTokenCallback:(nullable void (^)(NSString *))callback + error:(NSError **)error; + +/** + Stops any ongoing generation and cleans up internal resources. +*/ +- (void)stop; + +@end + +NS_ASSUME_NONNULL_END diff --git a/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm new file mode 100644 index 00000000000..dd57d25dcde --- /dev/null +++ b/extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchTextLLMRunner.mm @@ -0,0 +1,102 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#import "ExecuTorchTextLLMRunner.h" + +#import + +using namespace executorch::extension; +using namespace executorch::runtime; + +NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain = @"ExecuTorchTextLLMRunnerErrorDomain"; + +@implementation ExecuTorchTextLLMRunner { + NSString *_modelPath; + NSString *_tokenizerPath; + std::unique_ptr> _specialTokens; + std::unique_ptr _runner; +} + +- (instancetype)initWithModelPath:(NSString*)modelPath + tokenizerPath:(NSString*)tokenizerPath + specialTokens:(NSArray*)tokens { + self = [super init]; + if (self) { + _modelPath = [modelPath copy]; + _tokenizerPath = [tokenizerPath copy]; + _specialTokens = std::make_unique>(); + for (NSString *token in tokens) { + _specialTokens->emplace_back(token.UTF8String); + } + } + return self; +} + +- (BOOL)isLoaded { + return _runner && _runner->is_loaded(); +} + +- (BOOL)loadWithError:(NSError**)error { + if (![self isLoaded]) { + _runner = llm::create_text_llm_runner( + _modelPath.UTF8String, + llm::load_tokenizer(_tokenizerPath.UTF8String, std::move(_specialTokens)) + ); + if (!_runner) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + code:-1 + userInfo:@{NSLocalizedDescriptionKey: @"Failed to create runner"}]; + } + return NO; + } + } + auto status = _runner->load(); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + code:(NSInteger)status + userInfo:nil]; + } + return NO; + } + return YES; +} + +- (BOOL)generate:(NSString*)prompt + sequenceLength:(NSInteger)seq_len +withTokenCallback:(nullable void (^)(NSString*))callback + error:(NSError**)error { + if (![self loadWithError:error]) { + return NO; + } + auto status = _runner->generate( + prompt.UTF8String, + llm::GenerationConfig{.seq_len = static_cast(seq_len)}, + [callback](const std::string& token) { + if (callback) callback(@(token.c_str())); + } + ); + if (status != Error::Ok) { + if (error) { + *error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain + code:(NSInteger)status + userInfo:nil]; + } + return NO; + } + return YES; +} + +- (void)stop { + if (_runner) { + _runner->stop(); + } +} + +@end diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp index b93988cffd5..cf55d98224a 100644 --- a/extension/llm/runner/text_llm_runner.cpp +++ b/extension/llm/runner/text_llm_runner.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -256,6 +257,7 @@ std::unique_ptr load_tokenizer( std::optional pattern, size_t bos_token_index, size_t eos_token_index) { + runtime::runtime_init(); auto json_tokenizer = std::make_unique(); if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { ET_LOG(Info, "Loaded json tokenizer");