diff --git a/examples/apple/coreml/llama/TARGETS b/examples/apple/coreml/llama/TARGETS new file mode 100644 index 00000000000..87ad47fbf6d --- /dev/null +++ b/examples/apple/coreml/llama/TARGETS @@ -0,0 +1,66 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +runtime.python_library( + name = "llama_transformer", + srcs = [ + "llama_transformer.py", + ], + _is_external_target = True, + base_module = "executorch.examples.apple.coreml.llama", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "//caffe2:torch", + "//executorch/examples/models/llama:llama_transformer", + ], +) + +runtime.python_library( + name = "utils", + srcs = [ + "utils.py", + ], + _is_external_target = True, + base_module = "executorch.examples.apple.coreml.llama", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "//caffe2:torch", + ], +) + +runtime.python_binary( + name = "export", + srcs = [ + "export.py", + ], + main_function = "executorch.examples.apple.coreml.llama.export.main", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "fbsource//third-party/pypi/coremltools:coremltools", + ":llama_transformer", + ":utils", + "//caffe2:torch", + "//executorch/backends/apple/coreml:backend", + "//executorch/backends/apple/coreml:partitioner", + "//executorch/examples/models/llama:source_transformation", + "//executorch/exir/backend:utils", + "//executorch/exir/capture:config", + "//executorch/exir/passes:lib", + "//executorch/exir/passes:quant_fusion_pass", + "//executorch/exir/passes:sym_shape_eval_pass", + "//executorch/exir/program:program", + "//executorch/extension/export_util:export_util", + "//executorch/extension/llm/export:export_lib", + ], +) diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index f440dc878d4..b39162c484b 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -6,12 +6,18 @@ import argparse -import sys - import coremltools as ct import torch from executorch.backends.apple.coreml.compiler import CoreMLBackend # pyre-ignore from executorch.backends.apple.coreml.partition import CoreMLPartitioner # pyre-ignore + +from executorch.examples.apple.coreml.llama.llama_transformer import ( + InputManager, + load_model, +) +from executorch.examples.apple.coreml.llama.utils import ( + replace_linear_with_split_linear, +) from executorch.examples.models.llama.source_transformation.quantize import ( EmbeddingQuantHandler, ) @@ -24,10 +30,6 @@ from executorch.exir.program._program import to_edge_with_preserved_ops from executorch.extension.export_util.utils import save_pte_program -sys.path.insert(0, ".") -from llama_transformer import InputManager, load_model -from utils import replace_linear_with_split_linear - def main() -> None: parser = argparse.ArgumentParser() diff --git a/examples/apple/coreml/llama/llama_transformer.py b/examples/apple/coreml/llama/llama_transformer.py index 3c371da4c00..324f4aa1f2e 100644 --- a/examples/apple/coreml/llama/llama_transformer.py +++ b/examples/apple/coreml/llama/llama_transformer.py @@ -443,7 +443,7 @@ def forward( if not self.use_cache_list: k_out = torch.stack(k_out, dim=0) v_out = torch.stack(v_out, dim=0) - return logits, k_out, v_out + return logits, k_out, v_out # pyre-ignore[7] def load_model(checkpoint_path, params_path, max_seq_length, use_cache_list): @@ -614,7 +614,7 @@ def get_inputs(self, tokens: List[int]): torch.tensor(tokens, dtype=torch.int64), torch.zeros(self.seq_length - input_length, dtype=torch.int64), ], - axis=-1, + dim=-1, ).reshape(1, -1), # input_pos torch.tensor([self.input_pos], dtype=torch.long), diff --git a/examples/apple/coreml/llama/run.py b/examples/apple/coreml/llama/run.py index de22794dee1..e68471a1d29 100644 --- a/examples/apple/coreml/llama/run.py +++ b/examples/apple/coreml/llama/run.py @@ -5,19 +5,19 @@ # LICENSE file in the root directory of this source tree. import argparse -import sys import sentencepiece as spm import torch +from executorch.examples.apple.coreml.llama.llama_transformer import ( + InputManager, + load_model, +) -from executorch.runtime import Runtime - - -sys.path.insert(0, ".") from executorch.examples.models.llama.runner.generation import next_token from executorch.examples.models.llama.tokenizer import tiktoken -from llama_transformer import InputManager, load_model + +from executorch.runtime import Runtime class Tokenizer: diff --git a/pyproject.toml b/pyproject.toml index a7244133063..f81f286be2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ flatc = "executorch.data.bin:flatc" # TODO(mnachin T180504136): Do not put examples/models # into core pip packages. Refactor out the necessary utils # or core models files into a separate package. +"executorch.examples.apple.coreml.llama" = "examples/apple/coreml/llama" "executorch.examples.models" = "examples/models" "executorch.exir" = "exir" "executorch.extension" = "extension"