huggingface · yiyixuxu · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023 · patrickvonplaten
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -551,6 +551,7 @@ def get_timesteps(self, num_inference_steps, strength, device):
 
         t_start = max(num_inference_steps - init_timestep, 0)
         timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+        self.scheduler._step_index_init = t_start * self.scheduler.order
 
         return timesteps, num_inference_steps - t_start
 

diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
@@ -215,13 +215,21 @@ def __init__(
         self.model_outputs = [None] * solver_order
         self.lower_order_nums = 0
         self._step_index = None
+        self._step_index_init = None
 
     @property
     def step_index(self):
         """
         The index counter for current timestep. It will increae 1 after each scheduler step.
         """
         return self._step_index
+
+    @property
+    def step_index_init(self):
+        """
+        the first step_index for denoising loop.
+        """
+        return self._step_index_init
 
     def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torch.device] = None):
         """
@@ -760,23 +768,28 @@ def multistep_dpm_solver_third_order_update(
         return x_t
 
     def _init_step_index(self, timestep):
-        if isinstance(timestep, torch.Tensor):
-            timestep = timestep.to(self.timesteps.device)
-
-        index_candidates = (self.timesteps == timestep).nonzero()
-
-        if len(index_candidates) == 0:
-            step_index = len(self.timesteps) - 1
-        # The sigma index that is taken for the **very** first `step`
-        # is always the second index (or the last index if there is only 1)
-        # This way we can ensure we don't accidentally skip a sigma in
-        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
-        elif len(index_candidates) > 1:
-            step_index = index_candidates[1].item()
-        else:
-            step_index = index_candidates[0].item()
+
+        if self.step_index_init is None:
+            if isinstance(timestep, torch.Tensor):
+                timestep = timestep.to(self.timesteps.device)
+
+            index_candidates = (self.timesteps == timestep).nonzero()
+
+            if len(index_candidates) == 0:
+                step_index = len(self.timesteps) - 1
+            # The sigma index that is taken for the **very** first `step`
+            # is always the second index (or the last index if there is only 1)
+            # This way we can ensure we don't accidentally skip a sigma in
+            # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+            elif len(index_candidates) > 1:
+                step_index = index_candidates[1].item()
+            else:
+                step_index = index_candidates[0].item()
 
-        self._step_index = step_index
+            self._step_index_init = step_index
+            self._step_index = step_index
+        else:
+            self._step_index = self.step_index_init
 
     def step(
         self,
@@ -884,8 +897,10 @@ def add_noise(
         else:
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
-
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        if self.step_index_init is None:
+            step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        else:
+            step_indices = [self.step_index_init] * timesteps.shape[0]
 
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):