From 0b6d2de792580b9ca40c57fd846824dafe408c3a Mon Sep 17 00:00:00 2001
From: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com>
Date: Fri, 10 Jan 2025 19:56:03 -0500
Subject: [PATCH] Update prepare_dataset.py

fix: correct misspelling of "embedding" from "embeding" in three locations.

- Replaces all instances of "t5_embeding_max_length" with the correctly spelled "t5_embedding_max_length."
---
 .../models/diffusion/nemo/post_training/prepare_dataset.py  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py b/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py
index 7b4e85a4..48892010 100644
--- a/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py
+++ b/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py
@@ -108,7 +108,7 @@ def main(args):
     vae = init_video_tokenizer(args.tokenizer_dir)
 
     # Constants
-    t5_embeding_max_length = 512
+    t5_embedding_max_length = 512
     chunk_duration = vae.video_vae.pixel_chunk_duration  # Frames per chunk
     cnt = 0  # File index
 
@@ -153,9 +153,9 @@ def main(args):
                 out = encode_for_batch(tokenizer, text_encoder, [args.prompt])[0]
                 encoded_text = torch.tensor(out, dtype=torch.bfloat16)
 
-                # Pad T5 embedding to t5_embeding_max_length
+                # Pad T5 embedding to t5_embedding_max_length
                 L, C_ = encoded_text.shape
-                t5_embed = torch.zeros(1, t5_embeding_max_length, C_, dtype=torch.bfloat16)
+                t5_embed = torch.zeros(1, t5_embedding_max_length, C_, dtype=torch.bfloat16)
                 t5_embed[0, :L] = encoded_text
 
                 # Save data to folder