@@ -80,7 +80,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin,
8080 in_channels (`int`, *optional*, defaults to 4): Number of channels in the input sample.
8181 out_channels (`int`, *optional*, defaults to 4): Number of channels in the output.
8282 center_input_sample (`bool`, *optional*, defaults to `False`): Whether to center the input sample.
83- flip_sin_to_cos (`bool`, *optional*, defaults to `False `):
83+ flip_sin_to_cos (`bool`, *optional*, defaults to `True `):
8484 Whether to flip the sin to cos in the time embedding.
8585 freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding.
8686 down_block_types (`Tuple[str]`, *optional*, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`):
@@ -109,7 +109,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin,
109109 The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for
110110 [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.CrossAttnUpBlock2D`],
111111 [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`].
112- reverse_transformer_layers_per_block : (`Tuple[Tuple]`, *optional*, defaults to None):
112+ reverse_transformer_layers_per_block : (`Tuple[Tuple]`, *optional*, defaults to None):
113113 The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`], in the upsampling
114114 blocks of the U-Net. Only relevant if `transformer_layers_per_block` is of type `Tuple[Tuple]` and for
115115 [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.CrossAttnUpBlock2D`],
@@ -147,9 +147,9 @@ class conditioning with `class_embed_type` equal to `None`.
147147 The second activation function to use in timestep embedding. Choose from `silu`, `mish` and `gelu`.
148148 time_cond_proj_dim (`int`, *optional*, defaults to `None`):
149149 The dimension of `cond_proj` layer in the timestep embedding.
150- conv_in_kernel (`int`, *optional*, default to `3`): The kernel size of `conv_in` layer. conv_out_kernel (`int`,
151- *optional*, default to `3`): The kernel size of `conv_out` layer. projection_class_embeddings_input_dim (`int`,
152- *optional*): The dimension of the `class_labels` input when
150+ conv_in_kernel (`int`, *optional*, default to `3`): The kernel size of `conv_in` layer.
151+ conv_out_kernel (`int`, *optional*, default to `3`): The kernel size of `conv_out` layer.
152+ projection_class_embeddings_input_dim (`int`, *optional*): The dimension of the `class_labels` input when
153153 `class_embed_type="projection"`. Required when `class_embed_type="projection"`.
154154 class_embeddings_concat (`bool`, *optional*, defaults to `False`): Whether to concatenate the time
155155 embeddings with the class embeddings.
0 commit comments