From fff6598ccaa443654ad60458231a03e21a2550d0 Mon Sep 17 00:00:00 2001 From: ariG23498 Date: Sun, 28 Apr 2024 18:02:43 +0530 Subject: [PATCH 1/5] chore: reducing model sizes --- tests/pipelines/amused/test_amused.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/pipelines/amused/test_amused.py b/tests/pipelines/amused/test_amused.py index f03751e2f830..1ec3551eaa30 100644 --- a/tests/pipelines/amused/test_amused.py +++ b/tests/pipelines/amused/test_amused.py @@ -38,17 +38,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): def get_dummy_components(self): torch.manual_seed(0) transformer = UVit2DModel( - hidden_size=32, + hidden_size=8, use_bias=False, hidden_dropout=0.0, cond_embed_dim=32, micro_cond_encode_dim=2, micro_cond_embed_dim=10, - encoder_hidden_size=32, + encoder_hidden_size=8, vocab_size=32, codebook_size=32, in_channels=32, - block_out_channels=32, + block_out_channels=8, num_res_blocks=1, downsample=True, upsample=True, @@ -56,7 +56,7 @@ def get_dummy_components(self): num_hidden_layers=1, num_attention_heads=1, attention_dropout=0.0, - intermediate_size=32, + intermediate_size=8, layer_norm_eps=1e-06, ln_elementwise_affine=True, ) @@ -64,17 +64,17 @@ def get_dummy_components(self): torch.manual_seed(0) vqvae = VQModel( act_fn="silu", - block_out_channels=[32], + block_out_channels=[8], down_block_types=[ "DownEncoderBlock2D", ], in_channels=3, - latent_channels=32, - layers_per_block=2, - norm_num_groups=32, + latent_channels=8, + layers_per_block=1, + norm_num_groups=8, num_vq_embeddings=32, out_channels=3, - sample_size=32, + sample_size=8, up_block_types=[ "UpDecoderBlock2D", ], @@ -85,11 +85,11 @@ def get_dummy_components(self): text_encoder_config = CLIPTextConfig( bos_token_id=0, eos_token_id=2, - hidden_size=32, - intermediate_size=64, + hidden_size=8, + intermediate_size=8, layer_norm_eps=1e-05, - num_attention_heads=8, - num_hidden_layers=3, + num_attention_heads=1, + num_hidden_layers=1, pad_token_id=1, vocab_size=1000, projection_dim=32, From 136577d93c312514b543c8903dbe1135784980bc Mon Sep 17 00:00:00 2001 From: ariG23498 Date: Mon, 29 Apr 2024 17:36:17 +0530 Subject: [PATCH 2/5] chore: shrinks further --- tests/pipelines/amused/test_amused.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/amused/test_amused.py b/tests/pipelines/amused/test_amused.py index 1ec3551eaa30..e8c379879c2a 100644 --- a/tests/pipelines/amused/test_amused.py +++ b/tests/pipelines/amused/test_amused.py @@ -41,7 +41,7 @@ def get_dummy_components(self): hidden_size=8, use_bias=False, hidden_dropout=0.0, - cond_embed_dim=32, + cond_embed_dim=8, micro_cond_encode_dim=2, micro_cond_embed_dim=10, encoder_hidden_size=8, @@ -92,7 +92,7 @@ def get_dummy_components(self): num_hidden_layers=1, pad_token_id=1, vocab_size=1000, - projection_dim=32, + projection_dim=8, ) text_encoder = CLIPTextModelWithProjection(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") From ab4339c8f4acc8ee406ed55ea3ea03283014b5e5 Mon Sep 17 00:00:00 2001 From: ariG23498 Date: Mon, 29 Apr 2024 17:38:34 +0530 Subject: [PATCH 3/5] chore: shrinks further --- tests/pipelines/amused/test_amused.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pipelines/amused/test_amused.py b/tests/pipelines/amused/test_amused.py index e8c379879c2a..9a9e2551d642 100644 --- a/tests/pipelines/amused/test_amused.py +++ b/tests/pipelines/amused/test_amused.py @@ -46,8 +46,8 @@ def get_dummy_components(self): micro_cond_embed_dim=10, encoder_hidden_size=8, vocab_size=32, - codebook_size=32, - in_channels=32, + codebook_size=8, + in_channels=8, block_out_channels=8, num_res_blocks=1, downsample=True, @@ -72,7 +72,7 @@ def get_dummy_components(self): latent_channels=8, layers_per_block=1, norm_num_groups=8, - num_vq_embeddings=32, + num_vq_embeddings=8, out_channels=3, sample_size=8, up_block_types=[ From 407009a4ba794d80ae1d0b43332b52c90bd3a7a2 Mon Sep 17 00:00:00 2001 From: ariG23498 Date: Mon, 29 Apr 2024 18:16:48 +0530 Subject: [PATCH 4/5] chore: shrinking model for img2img pipeline --- tests/pipelines/amused/test_amused_img2img.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/pipelines/amused/test_amused_img2img.py b/tests/pipelines/amused/test_amused_img2img.py index efbca1f437a4..24bc34d330e9 100644 --- a/tests/pipelines/amused/test_amused_img2img.py +++ b/tests/pipelines/amused/test_amused_img2img.py @@ -42,17 +42,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): def get_dummy_components(self): torch.manual_seed(0) transformer = UVit2DModel( - hidden_size=32, + hidden_size=8, use_bias=False, hidden_dropout=0.0, - cond_embed_dim=32, + cond_embed_dim=8, micro_cond_encode_dim=2, micro_cond_embed_dim=10, - encoder_hidden_size=32, + encoder_hidden_size=8, vocab_size=32, - codebook_size=32, - in_channels=32, - block_out_channels=32, + codebook_size=8, + in_channels=8, + block_out_channels=8, num_res_blocks=1, downsample=True, upsample=True, @@ -60,7 +60,7 @@ def get_dummy_components(self): num_hidden_layers=1, num_attention_heads=1, attention_dropout=0.0, - intermediate_size=32, + intermediate_size=8, layer_norm_eps=1e-06, ln_elementwise_affine=True, ) @@ -68,17 +68,17 @@ def get_dummy_components(self): torch.manual_seed(0) vqvae = VQModel( act_fn="silu", - block_out_channels=[32], + block_out_channels=[8], down_block_types=[ "DownEncoderBlock2D", ], in_channels=3, - latent_channels=32, - layers_per_block=2, - norm_num_groups=32, - num_vq_embeddings=32, + latent_channels=8, + layers_per_block=1, + norm_num_groups=8, + num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half' out_channels=3, - sample_size=32, + sample_size=8, up_block_types=[ "UpDecoderBlock2D", ], @@ -89,14 +89,14 @@ def get_dummy_components(self): text_encoder_config = CLIPTextConfig( bos_token_id=0, eos_token_id=2, - hidden_size=32, - intermediate_size=64, + hidden_size=8, + intermediate_size=8, layer_norm_eps=1e-05, - num_attention_heads=8, - num_hidden_layers=3, + num_attention_heads=1, + num_hidden_layers=1, pad_token_id=1, vocab_size=1000, - projection_dim=32, + projection_dim=8, ) text_encoder = CLIPTextModelWithProjection(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") From 2eb50fe51c9b6d62be805a9c714371c9d404dfbd Mon Sep 17 00:00:00 2001 From: ariG23498 Date: Mon, 29 Apr 2024 18:26:33 +0530 Subject: [PATCH 5/5] chore: reducing size of model for inpaint pipeline --- tests/pipelines/amused/test_amused_inpaint.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/pipelines/amused/test_amused_inpaint.py b/tests/pipelines/amused/test_amused_inpaint.py index d397f8d81297..d0c1ed09c706 100644 --- a/tests/pipelines/amused/test_amused_inpaint.py +++ b/tests/pipelines/amused/test_amused_inpaint.py @@ -42,17 +42,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): def get_dummy_components(self): torch.manual_seed(0) transformer = UVit2DModel( - hidden_size=32, + hidden_size=8, use_bias=False, hidden_dropout=0.0, - cond_embed_dim=32, + cond_embed_dim=8, micro_cond_encode_dim=2, micro_cond_embed_dim=10, - encoder_hidden_size=32, + encoder_hidden_size=8, vocab_size=32, - codebook_size=32, - in_channels=32, - block_out_channels=32, + codebook_size=32, # codebook size needs to be consistent with num_vq_embeddings for inpaint tests + in_channels=8, + block_out_channels=8, num_res_blocks=1, downsample=True, upsample=True, @@ -60,7 +60,7 @@ def get_dummy_components(self): num_hidden_layers=1, num_attention_heads=1, attention_dropout=0.0, - intermediate_size=32, + intermediate_size=8, layer_norm_eps=1e-06, ln_elementwise_affine=True, ) @@ -68,17 +68,17 @@ def get_dummy_components(self): torch.manual_seed(0) vqvae = VQModel( act_fn="silu", - block_out_channels=[32], + block_out_channels=[8], down_block_types=[ "DownEncoderBlock2D", ], in_channels=3, - latent_channels=32, - layers_per_block=2, - norm_num_groups=32, - num_vq_embeddings=32, + latent_channels=8, + layers_per_block=1, + norm_num_groups=8, + num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half' out_channels=3, - sample_size=32, + sample_size=8, up_block_types=[ "UpDecoderBlock2D", ], @@ -89,14 +89,14 @@ def get_dummy_components(self): text_encoder_config = CLIPTextConfig( bos_token_id=0, eos_token_id=2, - hidden_size=32, - intermediate_size=64, + hidden_size=8, + intermediate_size=8, layer_norm_eps=1e-05, - num_attention_heads=8, - num_hidden_layers=3, + num_attention_heads=1, + num_hidden_layers=1, pad_token_id=1, vocab_size=1000, - projection_dim=32, + projection_dim=8, ) text_encoder = CLIPTextModelWithProjection(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")