Merge branch 'main' into nlp/decode-yield-eot-token

Jack-Khuu · web-flow · commit aef0b8ba9b46 · 2025-01-24T10:25:01.000-08:00
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20250119
+PYTORCH_NIGHTLY_VERSION=dev20250124
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20250119
+VISION_NIGHTLY_VERSION=dev20250124
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20250119
+TUNE_NIGHTLY_VERSION=dev20250124
 
 # The pip repository that hosts nightly torch packages. cpu by default.
 # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -1193,6 +1193,8 @@ def callback(x, *, done_generating=False):
                     max_seq_length=max_seq_length,
                     attention_backend=self.builder_args.attention_backend,
                 )
+                if generator_args.chat_mode:
+                    start_pos += encoded.size(0)
                 for token_tensor, metrics in generator_func:
                     if token_tensor is not None:
                         start_pos += token_tensor.size(0)
diff --git a/torchchat/model.py b/torchchat/model.py
@@ -657,7 +657,7 @@ def __init__(self, config: TransformerArgs) -> None:
             self.layers[str(layer_id)] = TransformerBlock(config)
 
         if config.stage_idx == config.n_stages - 1:
-            self.norm = RMSNorm(config.dim, eps=config.norm_eps)
+            self.norm = nn.RMSNorm(config.dim, eps=config.norm_eps)
             self.output = nn.Linear(config.dim, config.vocab_size, bias=False)
             if config.tie_word_embeddings:
                 self.output.weight = self.tok_embeddings.weight
@@ -751,8 +751,8 @@ def __init__(self, config: TransformerArgs) -> None:
         super().__init__()
         self.attention = Attention(config)
         self.feed_forward = FeedForward(config)
-        self.ffn_norm = RMSNorm(config.dim, config.norm_eps)
-        self.attention_norm = RMSNorm(config.dim, config.norm_eps)
+        self.ffn_norm = nn.RMSNorm(config.dim, config.norm_eps)
+        self.attention_norm = nn.RMSNorm(config.dim, config.norm_eps)
         # None for llama architecture, set for granite architectures
         self.residual_multiplier = (
             config.residual_multiplier
@@ -928,20 +928,6 @@ def forward(self, x: Tensor) -> Tensor:
         return self.w2(F.silu(self.w1(x)) * self.w3(x))
 
 
-class RMSNorm(nn.Module):
-    def __init__(self, dim: int, eps: float = 1e-5):
-        super().__init__()
-        self.eps = eps
-        self.weight = nn.Parameter(torch.ones(dim))
-
-    def _norm(self, x):
-        return x * torch.rsqrt(torch.mean(x * x, dim=-1, keepdim=True) + self.eps)
-
-    def forward(self, x: Tensor) -> Tensor:
-        output = self._norm(x.float()).type_as(x)
-        return output * self.weight
-
-
 def apply_scaling(freqs: torch.Tensor, rope_scaling: Dict[str, Any]):
     # Check for the presence of the required keys
     required_keys = {
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
@@ -88,7 +88,7 @@ install_executorch_python_libs() {
   echo "Building and installing python libraries"
   if [ "${ENABLE_ET_PYBIND}" = false ]; then
       echo "Not installing pybind"
-      bash ./install_requirements.sh
+      bash ./install_requirements.sh --pybind off
   else
       echo "Installing pybind"
       bash ./install_requirements.sh --pybind xnnpack

Original file line number	Diff line number	Diff line change
`@@ -1193,6 +1193,8 @@ def callback(x, *, done_generating=False):`
`1193`	`1193`	`max_seq_length=max_seq_length,`
`1194`	`1194`	`attention_backend=self.builder_args.attention_backend,`
`1195`	`1195`	`)`
	`1196`	`+ if generator_args.chat_mode:`
	`1197`	`+ start_pos += encoded.size(0)`
`1196`	`1198`	`for token_tensor, metrics in generator_func:`
`1197`	`1199`	`if token_tensor is not None:`
`1198`	`1200`	`start_pos += token_tensor.size(0)`