huggingface · pcuenca · Mar 27, 2023 · Mar 25, 2023 · Mar 25, 2023 · Mar 25, 2023
diff --git a/examples/community/checkpoint_merger.py b/examples/community/checkpoint_merger.py
@@ -199,24 +199,20 @@ def merge(self, pretrained_model_name_or_path_list: List[Union[str, os.PathLike]
             if not attr.startswith("_"):
                 checkpoint_path_1 = os.path.join(cached_folders[1], attr)
                 if os.path.exists(checkpoint_path_1):
-                    files = list(
-                        (
-                            *glob.glob(os.path.join(checkpoint_path_1, "*.safetensors")),
-                            *glob.glob(os.path.join(checkpoint_path_1, "*.bin")),
-                        )
-                    )
+                    files = [
+                        *glob.glob(os.path.join(checkpoint_path_1, "*.safetensors")),
+                        *glob.glob(os.path.join(checkpoint_path_1, "*.bin")),
+                    ]
                     checkpoint_path_1 = files[0] if len(files) > 0 else None
                 if len(cached_folders) < 3:
                     checkpoint_path_2 = None
                 else:
                     checkpoint_path_2 = os.path.join(cached_folders[2], attr)
                     if os.path.exists(checkpoint_path_2):
-                        files = list(
-                            (
-                                *glob.glob(os.path.join(checkpoint_path_2, "*.safetensors")),
-                                *glob.glob(os.path.join(checkpoint_path_2, "*.bin")),
-                            )
-                        )
+                        files = [
+                            *glob.glob(os.path.join(checkpoint_path_2, "*.safetensors")),
+                            *glob.glob(os.path.join(checkpoint_path_2, "*.bin")),
+                        ]
                         checkpoint_path_2 = files[0] if len(files) > 0 else None
                 # For an attr if both checkpoint_path_1 and 2 are None, ignore.
                 # If atleast one is present, deal with it according to interp method, of course only if the state_dict keys match.

diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py
@@ -48,7 +48,7 @@
 
 def preprocess(image):
     w, h = image.size
-    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    w, h = (x - x % 32 for x in (w, h))  # resize to integer multiple of 32
     image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"])
     image = np.array(image).astype(np.float32) / 255.0
     image = image[None].transpose(0, 3, 1, 2)

diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py
@@ -376,7 +376,7 @@ def get_weighted_text_embeddings(
 
 def preprocess_image(image):
     w, h = image.size
-    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    w, h = (x - x % 32 for x in (w, h))  # resize to integer multiple of 32
     image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"])
     image = np.array(image).astype(np.float32) / 255.0
     image = image[None].transpose(0, 3, 1, 2)
@@ -387,7 +387,7 @@ def preprocess_image(image):
 def preprocess_mask(mask, scale_factor=8):
     mask = mask.convert("L")
     w, h = mask.size
-    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    w, h = (x - x % 32 for x in (w, h))  # resize to integer multiple of 32
     mask = mask.resize((w // scale_factor, h // scale_factor), resample=PIL_INTERPOLATION["nearest"])
     mask = np.array(mask).astype(np.float32) / 255.0
     mask = np.tile(mask, (4, 1, 1))

diff --git a/examples/community/lpw_stable_diffusion_onnx.py b/examples/community/lpw_stable_diffusion_onnx.py
@@ -403,7 +403,7 @@ def get_weighted_text_embeddings(
 
 def preprocess_image(image):
     w, h = image.size
-    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    w, h = (x - x % 32 for x in (w, h))  # resize to integer multiple of 32
     image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"])
     image = np.array(image).astype(np.float32) / 255.0
     image = image[None].transpose(0, 3, 1, 2)
@@ -413,7 +413,7 @@ def preprocess_image(image):
 def preprocess_mask(mask, scale_factor=8):
     mask = mask.convert("L")
     w, h = mask.size
-    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    w, h = (x - x % 32 for x in (w, h))  # resize to integer multiple of 32
     mask = mask.resize((w // scale_factor, h // scale_factor), resample=PIL_INTERPOLATION["nearest"])
     mask = np.array(mask).astype(np.float32) / 255.0
     mask = np.tile(mask, (4, 1, 1))

diff --git a/examples/community/stable_unclip.py b/examples/community/stable_unclip.py
@@ -46,7 +46,7 @@ def __init__(
     ):
         super().__init__()
 
-        decoder_pipe_kwargs = dict(image_encoder=None) if decoder_pipe_kwargs is None else decoder_pipe_kwargs
+        decoder_pipe_kwargs = {"image_encoder": None} if decoder_pipe_kwargs is None else decoder_pipe_kwargs
 
         decoder_pipe_kwargs["torch_dtype"] = decoder_pipe_kwargs.get("torch_dtype", None) or prior.dtype
 

diff --git a/examples/instruct_pix2pix/train_instruct_pix2pix.py b/examples/instruct_pix2pix/train_instruct_pix2pix.py
@@ -673,7 +673,7 @@ def preprocess_train(examples):
         examples["edited_pixel_values"] = edited_images
 
         # Preprocess the captions.
-        captions = [caption for caption in examples[edit_prompt_column]]
+        captions = list(examples[edit_prompt_column])
         examples["input_ids"] = tokenize_captions(captions)
         return examples
 

diff --git a/examples/rl/run_diffuser_locomotion.py b/examples/rl/run_diffuser_locomotion.py
@@ -4,17 +4,17 @@
 from diffusers.experimental import ValueGuidedRLPipeline
 
 
-config = dict(
-    n_samples=64,
-    horizon=32,
-    num_inference_steps=20,
-    n_guide_steps=2,  # can set to 0 for faster sampling, does not use value network
-    scale_grad_by_std=True,
-    scale=0.1,
-    eta=0.0,
-    t_grad_cutoff=2,
-    device="cpu",
-)
+config = {
+    "n_samples": 64,
+    "horizon": 32,
+    "num_inference_steps": 20,
+    "n_guide_steps": 2,  # can set to 0 for faster sampling, does not use value network
+    "scale_grad_by_std": True,
+    "scale": 0.1,
+    "eta": 0.0,
+    "t_grad_cutoff": 2,
+    "device": "cpu",
+}
 
 
 if __name__ == "__main__":

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,8 +4,8 @@ target-version = ['py37']
 
 [tool.ruff]
 # Never enforce `E501` (line length violations).
-ignore = ["E501", "E741", "W605"]
-select = ["E", "F", "I", "W"]
+ignore = ["C901", "E501", "E741", "W605"]
+select = ["C", "E", "F", "I", "W"]
 line-length = 119
 
 # Ignore import violations in all `__init__.py` files.

diff --git a/scripts/convert_ddpm_original_checkpoint_to_diffusers.py b/scripts/convert_ddpm_original_checkpoint_to_diffusers.py
@@ -404,7 +404,7 @@ def convert_vq_autoenc_checkpoint(checkpoint, config):
         config = json.loads(f.read())
 
     # unet case
-    key_prefix_set = set(key.split(".")[0] for key in checkpoint.keys())
+    key_prefix_set = {key.split(".")[0] for key in checkpoint.keys()}
     if "encoder" in key_prefix_set and "decoder" in key_prefix_set:
         converted_checkpoint = convert_vq_autoenc_checkpoint(checkpoint, config)
     else:

diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py
@@ -24,29 +24,29 @@ def unet(hor):
         up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D")
     model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-mediumv2-hor{hor}.torch")
     state_dict = model.state_dict()
-    config = dict(
-        down_block_types=down_block_types,
-        block_out_channels=block_out_channels,
-        up_block_types=up_block_types,
-        layers_per_block=1,
-        use_timestep_embedding=True,
-        out_block_type="OutConv1DBlock",
-        norm_num_groups=8,
-        downsample_each_block=False,
-        in_channels=14,
-        out_channels=14,
-        extra_in_channels=0,
-        time_embedding_type="positional",
-        flip_sin_to_cos=False,
-        freq_shift=1,
-        sample_size=65536,
-        mid_block_type="MidResTemporalBlock1D",
-        act_fn="mish",
-    )
+    config = {
+        "down_block_types": down_block_types,
+        "block_out_channels": block_out_channels,
+        "up_block_types": up_block_types,
+        "layers_per_block": 1,
+        "use_timestep_embedding": True,
+        "out_block_type": "OutConv1DBlock",
+        "norm_num_groups": 8,
+        "downsample_each_block": False,
+        "in_channels": 14,
+        "out_channels": 14,
+        "extra_in_channels": 0,
+        "time_embedding_type": "positional",
+        "flip_sin_to_cos": False,
+        "freq_shift": 1,
+        "sample_size": 65536,
+        "mid_block_type": "MidResTemporalBlock1D",
+        "act_fn": "mish",
+    }
     hf_value_function = UNet1DModel(**config)
     print(f"length of state dict: {len(state_dict.keys())}")
     print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}")
-    mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys()))
+    mapping = dict(zip(model.state_dict().keys(), hf_value_function.state_dict().keys()))
     for k, v in mapping.items():
         state_dict[v] = state_dict.pop(k)
     hf_value_function.load_state_dict(state_dict)
@@ -57,33 +57,33 @@ def unet(hor):
 
 
 def value_function():
-    config = dict(
-        in_channels=14,
-        down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"),
-        up_block_types=(),
-        out_block_type="ValueFunction",
-        mid_block_type="ValueFunctionMidBlock1D",
-        block_out_channels=(32, 64, 128, 256),
-        layers_per_block=1,
-        downsample_each_block=True,
-        sample_size=65536,
-        out_channels=14,
-        extra_in_channels=0,
-        time_embedding_type="positional",
-        use_timestep_embedding=True,
-        flip_sin_to_cos=False,
-        freq_shift=1,
-        norm_num_groups=8,
-        act_fn="mish",
-    )
+    config = {
+        "in_channels": 14,
+        "down_block_types": ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"),
+        "up_block_types": (),
+        "out_block_type": "ValueFunction",
+        "mid_block_type": "ValueFunctionMidBlock1D",
+        "block_out_channels": (32, 64, 128, 256),
+        "layers_per_block": 1,
+        "downsample_each_block": True,
+        "sample_size": 65536,
+        "out_channels": 14,
+        "extra_in_channels": 0,
+        "time_embedding_type": "positional",
+        "use_timestep_embedding": True,
+        "flip_sin_to_cos": False,
+        "freq_shift": 1,
+        "norm_num_groups": 8,
+        "act_fn": "mish",
+    }
 
     model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch")
     state_dict = model
     hf_value_function = UNet1DModel(**config)
     print(f"length of state dict: {len(state_dict.keys())}")
     print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}")
 
-    mapping = dict((k, hfk) for k, hfk in zip(state_dict.keys(), hf_value_function.state_dict().keys()))
+    mapping = dict(zip(state_dict.keys(), hf_value_function.state_dict().keys()))
     for k, v in mapping.items():
         state_dict[v] = state_dict.pop(k)
 

diff --git a/scripts/convert_original_audioldm_to_diffusers.py b/scripts/convert_original_audioldm_to_diffusers.py
@@ -246,19 +246,19 @@ def create_unet_diffusers_config(original_config, image_size: int):
     )
     class_embeddings_concat = unet_params.extra_film_use_concat if "extra_film_use_concat" in unet_params else None
 
-    config = dict(
-        sample_size=image_size // vae_scale_factor,
-        in_channels=unet_params.in_channels,
-        out_channels=unet_params.out_channels,
-        down_block_types=tuple(down_block_types),
-        up_block_types=tuple(up_block_types),
-        block_out_channels=tuple(block_out_channels),
-        layers_per_block=unet_params.num_res_blocks,
-        cross_attention_dim=cross_attention_dim,
-        class_embed_type=class_embed_type,
-        projection_class_embeddings_input_dim=projection_class_embeddings_input_dim,
-        class_embeddings_concat=class_embeddings_concat,
-    )
+    config = {
+        "sample_size": image_size // vae_scale_factor,
+        "in_channels": unet_params.in_channels,
+        "out_channels": unet_params.out_channels,
+        "down_block_types": tuple(down_block_types),
+        "up_block_types": tuple(up_block_types),
+        "block_out_channels": tuple(block_out_channels),
+        "layers_per_block": unet_params.num_res_blocks,
+        "cross_attention_dim": cross_attention_dim,
+        "class_embed_type": class_embed_type,
+        "projection_class_embeddings_input_dim": projection_class_embeddings_input_dim,
+        "class_embeddings_concat": class_embeddings_concat,
+    }
 
     return config
 
@@ -278,17 +278,17 @@ def create_vae_diffusers_config(original_config, checkpoint, image_size: int):
 
     scaling_factor = checkpoint["scale_factor"] if "scale_by_std" in original_config.model.params else 0.18215
 
-    config = dict(
-        sample_size=image_size,
-        in_channels=vae_params.in_channels,
-        out_channels=vae_params.out_ch,
-        down_block_types=tuple(down_block_types),
-        up_block_types=tuple(up_block_types),
-        block_out_channels=tuple(block_out_channels),
-        latent_channels=vae_params.z_channels,
-        layers_per_block=vae_params.num_res_blocks,
-        scaling_factor=float(scaling_factor),
-    )
+    config = {
+        "sample_size": image_size,
+        "in_channels": vae_params.in_channels,
+        "out_channels": vae_params.out_ch,
+        "down_block_types": tuple(down_block_types),
+        "up_block_types": tuple(up_block_types),
+        "block_out_channels": tuple(block_out_channels),
+        "latent_channels": vae_params.z_channels,
+        "layers_per_block": vae_params.num_res_blocks,
+        "scaling_factor": float(scaling_factor),
+    }
     return config
 
 
@@ -670,18 +670,18 @@ def create_transformers_vocoder_config(original_config):
     """
     vocoder_params = original_config.model.params.vocoder_config.params
 
-    config = dict(
-        model_in_dim=vocoder_params.num_mels,
-        sampling_rate=vocoder_params.sampling_rate,
-        upsample_initial_channel=vocoder_params.upsample_initial_channel,
-        upsample_rates=list(vocoder_params.upsample_rates),
-        upsample_kernel_sizes=list(vocoder_params.upsample_kernel_sizes),
-        resblock_kernel_sizes=list(vocoder_params.resblock_kernel_sizes),
-        resblock_dilation_sizes=[
+    config = {
+        "model_in_dim": vocoder_params.num_mels,
+        "sampling_rate": vocoder_params.sampling_rate,
+        "upsample_initial_channel": vocoder_params.upsample_initial_channel,
+        "upsample_rates": list(vocoder_params.upsample_rates),
+        "upsample_kernel_sizes": list(vocoder_params.upsample_kernel_sizes),
+        "resblock_kernel_sizes": list(vocoder_params.resblock_kernel_sizes),
+        "resblock_dilation_sizes": [
             list(resblock_dilation) for resblock_dilation in vocoder_params.resblock_dilation_sizes
         ],
-        normalize_before=False,
-    )
+        "normalize_before": False,
+    }
 
     return config