Skip to content

Commit 9f583d9

Browse files
author
yiyixuxu
committed
more
1 parent 62bceb7 commit 9f583d9

File tree

3 files changed

+6
-5
lines changed

3 files changed

+6
-5
lines changed

src/diffusers/loaders/unet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from huggingface_hub.utils import validate_hf_hub_args
2323
from torch import nn
2424

25-
from ..models.embeddings import ImageProjection, MLPProjection, IPAdapterPlusImageProjection
25+
from ..models.embeddings import ImageProjection, IPAdapterFullImageProjection, IPAdapterPlusImageProjection
2626
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
2727
from ..utils import (
2828
USE_PEFT_BACKEND,
@@ -689,7 +689,7 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict):
689689
clip_embeddings_dim = state_dict["proj.0.weight"].shape[0]
690690
cross_attention_dim = state_dict["proj.3.weight"].shape[0]
691691

692-
image_projection = MLPProjection(
692+
image_projection = IPAdapterFullImageProjection(
693693
cross_attention_dim=cross_attention_dim, image_embed_dim=clip_embeddings_dim
694694
)
695695

@@ -707,7 +707,7 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict):
707707
hidden_dims = state_dict["latents"].shape[2]
708708
heads = state_dict["layers.0.0.to_q.weight"].shape[0] // 64
709709

710-
image_projection = Resampler(
710+
image_projection = IPAdapterPlusImageProjection(
711711
embed_dims=embed_dims,
712712
output_dims=output_dims,
713713
hidden_dims=hidden_dims,

src/diffusers/models/embeddings.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ def forward(self, image_embeds: torch.FloatTensor):
461461
return image_embeds
462462

463463

464-
class MLPProjection(nn.Module):
464+
class IPAdapterFullImageProjection(nn.Module):
465465
def __init__(self, image_embed_dim=1024, cross_attention_dim=1024):
466466
super().__init__()
467467
from .attention import FeedForward
@@ -640,6 +640,7 @@ def get_fourier_embeds_from_boundingbox(embed_dim, box):
640640

641641
return emb
642642

643+
643644
class GLIGENTextBoundingboxProjection(nn.Module):
644645
def __init__(self, positive_len, out_dim, feature_type="text-only", fourier_freqs=8):
645646
super().__init__()

src/diffusers/models/unet_2d_condition.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
)
3333
from .embeddings import (
3434
GaussianFourierProjection,
35+
GLIGENTextBoundingboxProjection,
3536
ImageHintTimeEmbedding,
3637
ImageProjection,
3738
ImageTimeEmbedding,
38-
GLIGENTextBoundingboxProjection,
3939
TextImageProjection,
4040
TextImageTimeEmbedding,
4141
TextTimeEmbedding,

0 commit comments

Comments
 (0)