diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index cf21edf99165..81c75fecec6e 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -34,6 +34,7 @@ jobs: python -m pip install --upgrade pip python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate - name: Environment run: | @@ -80,6 +81,7 @@ jobs: ${CONDA_RUN} python -m pip install --upgrade pip ${CONDA_RUN} python -m pip install -e .[quality,test] ${CONDA_RUN} python -m pip install --pre torch==${MPS_TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/test/cpu + ${CONDA_RUN} python -m pip install git+https://github.com/huggingface/accelerate - name: Environment shell: arch -arch arm64 bash {0} diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 3e4a81c91c01..dfd83aa9af46 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -36,6 +36,7 @@ jobs: python -m pip uninstall -y torch torchvision torchtext python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 python -m pip install -e .[quality,test] + python -m pip install git+https://github.com/huggingface/accelerate - name: Environment run: | @@ -58,8 +59,6 @@ jobs: name: torch_test_reports path: reports - - run_examples_single_gpu: name: Examples tests runs-on: [ self-hosted, docker-gpu, single-gpu ] @@ -83,6 +82,7 @@ jobs: python -m pip uninstall -y torch torchvision torchtext python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 python -m pip install -e .[quality,test,training] + python -m pip install git+https://github.com/huggingface/accelerate - name: Environment run: | diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 894505654e47..eb0a634ccc30 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -209,6 +209,8 @@ def device(self) -> torch.device: for name in module_names.keys(): module = getattr(self, name) if isinstance(module, torch.nn.Module): + if module.device == torch.device("meta"): + return torch.device("cpu") return module.device return torch.device("cpu") diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 02a6b45fdefc..cf4c5c5fdeca 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -3,6 +3,7 @@ import torch +from diffusers.utils import is_accelerate_available from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict @@ -118,6 +119,18 @@ def disable_attention_slicing(self): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + def cuda_with_minimal_gpu_usage(self): + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = torch.device("cuda") + self.enable_attention_slicing(1) + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + cpu_offload(cpu_offloaded_model, device) + @torch.no_grad() def __call__( self, diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index a4686366c8e7..6e9388ca3a65 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -535,3 +535,23 @@ def test_stable_diffusion_accelerate_load_reduces_memory_footprint(self): tracemalloc.stop() assert peak_accelerate < peak_normal + + @slow + @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") + def test_stable_diffusion_pipeline_with_unet_on_gpu_only(self): + torch.cuda.empty_cache() + torch.cuda.reset_max_memory_allocated() + + pipeline_id = "CompVis/stable-diffusion-v1-4" + prompt = "Andromeda galaxy in a bottle" + + pipeline = StableDiffusionPipeline.from_pretrained( + pipeline_id, revision="fp16", torch_dtype=torch.float32, use_auth_token=True + ) + pipeline.cuda_with_minimal_gpu_usage() + + _ = pipeline(prompt) + + mem_bytes = torch.cuda.max_memory_allocated() + # make sure that less than 0.8 GB is allocated + assert mem_bytes < 0.8 * 10**9