Skip to content

Commit eef2a99

Browse files
committed
[Executorch][llama] Renamed quantized_kv_cache to custom_kv_cache
Because old name was misnomer Differential Revision: [D71833067](https://our.internmc.facebook.com/intern/diff/D71833067/) ghstack-source-id: 276640305 Pull Request resolved: #9944
1 parent 88944a8 commit eef2a99

File tree

7 files changed

+15
-15
lines changed

7 files changed

+15
-15
lines changed

examples/models/llama/TARGETS

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ runtime.python_library(
108108
"source_transformation/pre_quantization.py",
109109
"source_transformation/prune_vocab.py",
110110
"source_transformation/quantize.py",
111-
"source_transformation/quantized_kv_cache.py",
111+
"source_transformation/custom_kv_cache.py",
112112
"source_transformation/rms_norm.py",
113113
"source_transformation/rope.py",
114114
"source_transformation/sdpa.py",
@@ -208,9 +208,9 @@ runtime.python_library(
208208
)
209209

210210
runtime.python_library(
211-
name = "quantized_kv_cache",
211+
name = "custom_kv_cache",
212212
srcs = [
213-
"source_transformation/quantized_kv_cache.py",
213+
"source_transformation/custom_kv_cache.py",
214214
],
215215
_is_external_target = True,
216216
visibility = ["//executorch/..."],
@@ -240,7 +240,7 @@ runtime.python_test(
240240
"//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
241241
],
242242
deps = [
243-
":quantized_kv_cache",
243+
":custom_kv_cache",
244244
"//caffe2:torch",
245245
"//executorch/examples/models/llama:llama_transformer",
246246
],
@@ -255,7 +255,7 @@ runtime.python_test(
255255
"//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
256256
],
257257
deps = [
258-
":quantized_kv_cache",
258+
":custom_kv_cache",
259259
":sdpa",
260260
"//caffe2:torch",
261261
"//executorch/examples/models/llama:llama_transformer",

examples/models/llama/export_llama_lib.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@
5959
)
6060

6161
from .source_transformation.attention import replace_attention_to_attention_sha
62+
from .source_transformation.custom_kv_cache import (
63+
replace_kv_cache_with_custom_kv_cache,
64+
replace_kv_cache_with_quantized_kv_cache,
65+
)
6266
from .source_transformation.quantize import (
6367
get_quant_embedding_transform,
6468
get_quant_weight_transform,
6569
)
66-
from .source_transformation.quantized_kv_cache import (
67-
replace_kv_cache_with_custom_kv_cache,
68-
replace_kv_cache_with_quantized_kv_cache,
69-
)
7070
from .source_transformation.rms_norm import replace_rms_norm_with_native_rms_norm
7171

7272
from .source_transformation.rope import materialze_broadcast_of_rope_freq_cis

examples/models/llama/source_transformation/test_quantized_kv_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from executorch.examples.models.llama.attention import KVCache
1212

13-
from executorch.examples.models.llama.source_transformation.quantized_kv_cache import (
13+
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
1414
QuantizedCacheType,
1515
QuantizedKVCache,
1616
)

examples/models/llama/source_transformation/test_sdpa_with_quantized_kv_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from executorch.examples.models.llama.attention import KVCache
1212

13-
from executorch.examples.models.llama.source_transformation.quantized_kv_cache import (
13+
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
1414
CustomKVCache,
1515
QuantizedCacheType,
1616
QuantizedKVCache,

examples/models/llava/export_llava.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
build_args_parser,
2121
get_quantizer_and_quant_params,
2222
)
23+
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
24+
replace_kv_cache_with_custom_kv_cache,
25+
)
2326
from executorch.examples.models.llama.source_transformation.quantize import (
2427
EmbeddingQuantHandler,
2528
get_quant_weight_transform,
2629
)
27-
from executorch.examples.models.llama.source_transformation.quantized_kv_cache import (
28-
replace_kv_cache_with_custom_kv_cache,
29-
)
3030
from executorch.examples.models.llama.source_transformation.sdpa import (
3131
replace_sdpa_with_custom_op,
3232
)

examples/models/llava/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from executorch.examples.models.llama.llama_transformer import Transformer
1616
from executorch.examples.models.llama.model_args import ModelArgs
1717

18-
from executorch.examples.models.llama.source_transformation.quantized_kv_cache import (
18+
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
1919
replace_kv_cache_with_custom_kv_cache,
2020
)
2121
from executorch.examples.models.llama.source_transformation.sdpa import (

0 commit comments

Comments
 (0)