huggingface
diff --git a/‎examples/dreambooth/train_dreambooth_flax.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/dreambooth/train_dreambooth_flax.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/text_to_image/train_text_to_image_flax.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/text_to_image/train_text_to_image_flax.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/textual_inversion/textual_inversion_flax.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/textual_inversion/textual_inversion_flax.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py‎
Lines changed: 2 additions & 1 deletion b/‎src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/diffusers/schedulers/scheduling_common_flax.py‎
Lines changed: 106 additions & 0 deletions b/‎src/diffusers/schedulers/scheduling_common_flax.py‎
Lines changed: 106 additions & 0 deletions
@@ -477,6 +477,7 @@ def collate_fn(examples):
     noise_scheduler = FlaxDDPMScheduler(
         beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000
     )
+    noise_scheduler_state = noise_scheduler.create_state()
 
     # Initialize our training
     train_rngs = jax.random.split(rng, jax.local_device_count())
@@ -513,7 +514,7 @@ def compute_loss(params):
 
             # Add noise to the latents according to the noise magnitude at each timestep
             # (this is the forward diffusion process)
-            noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+            noisy_latents = noise_scheduler.add_noise(noise_scheduler_state, latents, noise, timesteps)
 
             # Get the text embedding for conditioning
             if args.train_text_encoder:
 
@@ -417,6 +417,7 @@ def collate_fn(examples):
     noise_scheduler = FlaxDDPMScheduler(
         beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000
     )
+    noise_scheduler_state = noise_scheduler.create_state()
 
     # Initialize our training
     rng = jax.random.PRNGKey(args.seed)
@@ -449,7 +450,7 @@ def compute_loss(params):
 
             # Add noise to the latents according to the noise magnitude at each timestep
             # (this is the forward diffusion process)
-            noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+            noisy_latents = noise_scheduler.add_noise(noise_scheduler_state, latents, noise, timesteps)
 
             # Get the text embedding for conditioning
             encoder_hidden_states = text_encoder(
 
@@ -505,6 +505,7 @@ def update_fn(updates, state, params=None):
     noise_scheduler = FlaxDDPMScheduler(
         beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000
     )
+    noise_scheduler_state = noise_scheduler.create_state()
 
     # Initialize our training
     train_rngs = jax.random.split(rng, jax.local_device_count())
@@ -531,7 +532,7 @@ def compute_loss(params):
                 0,
                 noise_scheduler.config.num_train_timesteps,
             )
-            noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+            noisy_latents = noise_scheduler.add_noise(noise_scheduler_state, latents, noise, timesteps)
             encoder_hidden_states = state.apply_fn(
                 batch["input_ids"], params=params, dropout_rng=dropout_rng, train=True
             )[0]
 
@@ -261,7 +261,8 @@ def loop_body(step, args):
         )
 
         # scale the initial noise by the standard deviation required by the scheduler
-        latents = latents * self.scheduler.init_noise_sigma
+        latents = latents * params["scheduler"].init_noise_sigma
+
         if DEBUG:
             # run with python for loop
             for i in range(num_inference_steps):
 
@@ -0,0 +1,106 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+import flax
+import jax.numpy as jnp
+
+from .scheduling_utils_flax import broadcast_to_shape_from_left
+
+
+def betas_for_alpha_bar(num_diffusion_timesteps: int, max_beta=0.999, dtype=jnp.float32) -> jnp.ndarray:
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+
+    Returns:
+        betas (`jnp.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+
+    def alpha_bar(time_step):
+        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return jnp.array(betas, dtype=dtype)
+
+
+@flax.struct.dataclass
+class SchedulerCommonState:
+    alphas: jnp.ndarray
+    betas: jnp.ndarray
+    alphas_cumprod: jnp.ndarray
+
+
+def create_common_state(scheduler):
+    config = scheduler.config
+
+    if config.trained_betas is not None:
+        betas = jnp.asarray(config.trained_betas, dtype=scheduler.dtype)
+    elif config.beta_schedule == "linear":
+        betas = jnp.linspace(config.beta_start, config.beta_end, config.num_train_timesteps, dtype=scheduler.dtype)
+    elif config.beta_schedule == "scaled_linear":
+        # this schedule is very specific to the latent diffusion model.
+        betas = (
+            jnp.linspace(
+                config.beta_start**0.5, config.beta_end**0.5, config.num_train_timesteps, dtype=scheduler.dtype
+            )
+            ** 2
+        )
+    elif config.beta_schedule == "squaredcos_cap_v2":
+        # Glide cosine schedule
+        betas = betas_for_alpha_bar(config.num_train_timesteps, dtype=scheduler.dtype)
+    else:
+        raise NotImplementedError(
+            f"beta_schedule {config.beta_schedule} is not implemented for scheduler {scheduler.__class__.__name__}"
+        )
+
+    alphas = 1.0 - betas
+
+    alphas_cumprod = jnp.cumprod(alphas, axis=0)
+
+    return SchedulerCommonState(
+        alphas=alphas,
+        betas=betas,
+        alphas_cumprod=alphas_cumprod,
+    )
+
+
+def add_noise_common(
+    state: SchedulerCommonState, original_samples: jnp.ndarray, noise: jnp.ndarray, timesteps: jnp.ndarray
+):
+    alphas_cumprod = state.alphas_cumprod
+
+    sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+    sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+    sqrt_alpha_prod = broadcast_to_shape_from_left(sqrt_alpha_prod, original_samples.shape)
+
+    sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+    sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+    sqrt_one_minus_alpha_prod = broadcast_to_shape_from_left(sqrt_one_minus_alpha_prod, original_samples.shape)
+
+    noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+    return noisy_samples
Original file line number	Diff line number	Diff line change
`@@ -261,7 +261,8 @@ def loop_body(step, args):`
`261`	`261`	`)`
`262`	`262`
`263`	`263`	`# scale the initial noise by the standard deviation required by the scheduler`
`264`		`- latents = latents * self.scheduler.init_noise_sigma`
	`264`	`+ latents = latents * params["scheduler"].init_noise_sigma`
	`265`	`+`
`265`	`266`	`if DEBUG:`
`266`	`267`	`# run with python for loop`
`267`	`268`	`for i in range(num_inference_steps):`