huggingface · yiyixuxu · Apr 19, 2024 · Apr 17, 2024 · Apr 19, 2024 · Apr 18, 2024
diff --git a/setup.py b/setup.py
@@ -95,7 +95,7 @@
 # 2. once modified, run: `make deps_table_update` to update src/diffusers/dependency_versions_table.py
 _deps = [
     "Pillow",  # keep the PIL.Image.Resampling deprecation away
-    "accelerate>=0.11.0",
+    "accelerate>=0.29.3",
     "compel==0.1.8",
     "datasets",
     "filelock",

diff --git a/src/diffusers/dependency_versions_table.py b/src/diffusers/dependency_versions_table.py
@@ -3,7 +3,7 @@
 # 2. run `make deps_table_update`
 deps = {
     "Pillow": "Pillow",
-    "accelerate": "accelerate>=0.11.0",
+    "accelerate": "accelerate>=0.29.3",
     "compel": "compel==0.1.8",
     "datasets": "datasets",
     "filelock": "filelock",

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -700,6 +700,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                             offload_state_dict=offload_state_dict,
                             dtype=torch_dtype,
                             force_hooks=True,
+                            strict=True,
                         )
                     except AttributeError as e:
                         # When using accelerate loading, we do not have the ability to load the state

diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py
@@ -571,15 +571,17 @@ def _get_final_device_map(device_map, pipeline_class, passed_class_obj, init_dic
 
     # Obtain a dictionary mapping the model-level components to the available
     # devices based on the maximum memory and the model sizes.
-    device_id_component_mapping = _assign_components_to_devices(
-        module_sizes, max_memory, device_mapping_strategy=device_map
-    )
+    final_device_map = None
+    if len(max_memory) > 0:
+        device_id_component_mapping = _assign_components_to_devices(
+            module_sizes, max_memory, device_mapping_strategy=device_map
+        )
 
-    # Obtain the final device map, e.g., `{"unet": 0, "text_encoder": 1, "vae": 1, ...}`
-    final_device_map = {}
-    for device_id, components in device_id_component_mapping.items():
-        for component in components:
-            final_device_map[component] = device_id
+        # Obtain the final device map, e.g., `{"unet": 0, "text_encoder": 1, "vae": 1, ...}`
+        final_device_map = {}
+        for device_id, components in device_id_component_mapping.items():
+            for component in components:
+                final_device_map[component] = device_id
 
     return final_device_map
 

diff --git a/tests/models/test_attention_processor.py b/tests/models/test_attention_processor.py
@@ -1,7 +1,10 @@
+import tempfile
 import unittest
 
+import numpy as np
 import torch
 
+from diffusers import DiffusionPipeline
 from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor
 
 
@@ -77,42 +80,41 @@ def test_only_cross_attention(self):
 
 class DeprecatedAttentionBlockTests(unittest.TestCase):
     def test_conversion_when_using_device_map(self):
-        # To-DO for Sayak: enable this test again and to test `device_map='balanced'` once we have this in accelerate https://github.com/huggingface/accelerate/pull/2641
-        pass
-        # pipe = DiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None)
-
-        # pre_conversion = pipe(
-        #     "foo",
-        #     num_inference_steps=2,
-        #     generator=torch.Generator("cpu").manual_seed(0),
-        #     output_type="np",
-        # ).images
-
-        # # the initial conversion succeeds
-        # pipe = DiffusionPipeline.from_pretrained(
-        #     "hf-internal-testing/tiny-stable-diffusion-pipe", device_map="sequential", safety_checker=None
-        # )
-
-        # conversion = pipe(
-        #     "foo",
-        #     num_inference_steps=2,
-        #     generator=torch.Generator("cpu").manual_seed(0),
-        #     output_type="np",
-        # ).images
-
-        # with tempfile.TemporaryDirectory() as tmpdir:
-        #     # save the converted model
-        #     pipe.save_pretrained(tmpdir)
-
-        #     # can also load the converted weights
-        #     pipe = DiffusionPipeline.from_pretrained(tmpdir, device_map="sequential", safety_checker=None)
-
-        # after_conversion = pipe(
-        #     "foo",
-        #     num_inference_steps=2,
-        #     generator=torch.Generator("cpu").manual_seed(0),
-        #     output_type="np",
-        # ).images
-
-        # self.assertTrue(np.allclose(pre_conversion, conversion, atol=1e-5))
-        # self.assertTrue(np.allclose(conversion, after_conversion, atol=1e-5))
+        pipe = DiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None
+        )
+
+        pre_conversion = pipe(
+            "foo",
+            num_inference_steps=2,
+            generator=torch.Generator("cpu").manual_seed(0),
+            output_type="np",
+        ).images
+
+        # the initial conversion succeeds
+        pipe = DiffusionPipeline.from_pretrained(
+            "hf-internal-testing/tiny-stable-diffusion-torch", device_map="balanced", safety_checker=None
+        )
+
+        conversion = pipe(
+            "foo",
+            num_inference_steps=2,
+            generator=torch.Generator("cpu").manual_seed(0),
+            output_type="np",
+        ).images
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # save the converted model
+            pipe.save_pretrained(tmpdir)
+
+            # can also load the converted weights
+            pipe = DiffusionPipeline.from_pretrained(tmpdir, device_map="balanced", safety_checker=None)
+        after_conversion = pipe(
+            "foo",
+            num_inference_steps=2,
+            generator=torch.Generator("cpu").manual_seed(0),
+            output_type="np",
+        ).images
+
+        self.assertTrue(np.allclose(pre_conversion, conversion, atol=1e-3))
+        self.assertTrue(np.allclose(conversion, after_conversion, atol=1e-3))