diff --git a/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json b/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json index c71e7807..0064d6f5 100644 --- a/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json +++ b/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json @@ -13,7 +13,8 @@ "PipelineType": "LatentConsistency", "Diffusers": [ "TextToImage", - "ImageToImage" + "ImageToImage", + "ImageInpaintLegacy" ], "ModelFiles": [ "https://huggingface.co/TheyCallMeHex/LCM-Dreamshaper-V7-ONNX/resolve/main/tokenizer/model.onnx", diff --git a/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/InpaintLegacyDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/InpaintLegacyDiffuser.cs new file mode 100644 index 00000000..a8b678b1 --- /dev/null +++ b/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/InpaintLegacyDiffuser.cs @@ -0,0 +1,256 @@ +using Microsoft.Extensions.Logging; +using Microsoft.ML.OnnxRuntime.Tensors; +using OnnxStack.Core; +using OnnxStack.Core.Config; +using OnnxStack.Core.Model; +using OnnxStack.Core.Services; +using OnnxStack.StableDiffusion.Common; +using OnnxStack.StableDiffusion.Config; +using OnnxStack.StableDiffusion.Enums; +using OnnxStack.StableDiffusion.Helpers; +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.Processing; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace OnnxStack.StableDiffusion.Diffusers.LatentConsistency +{ + public sealed class InpaintLegacyDiffuser : LatentConsistencyDiffuser + { + /// + /// Initializes a new instance of the class. + /// + /// The configuration. + /// The onnx model service. + public InpaintLegacyDiffuser(IOnnxModelService onnxModelService, IPromptService promptService, ILogger logger) + : base(onnxModelService, promptService, logger) + { + } + + + /// + /// Gets the type of the diffuser. + /// + public override DiffuserType DiffuserType => DiffuserType.ImageInpaintLegacy; + + + /// + /// Gets the timesteps. + /// + /// The prompt. + /// The options. + /// The scheduler. + /// + protected override IReadOnlyList GetTimesteps(SchedulerOptions options, IScheduler scheduler) + { + // Image2Image we narrow step the range by the Strength + var inittimestep = Math.Min((int)(options.InferenceSteps * options.Strength), options.InferenceSteps); + var start = Math.Max(options.InferenceSteps - inittimestep, 0); + return scheduler.Timesteps.Skip(start).ToList(); + } + + + /// + /// Runs the scheduler steps. + /// + /// The model options. + /// The prompt options. + /// The scheduler options. + /// The prompt embeddings. + /// if set to true [perform guidance]. + /// The progress callback. + /// The cancellation token. + /// + protected override async Task> SchedulerStepAsync(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions, DenseTensor promptEmbeddings, bool performGuidance, Action progressCallback = null, CancellationToken cancellationToken = default) + { + using (var scheduler = GetScheduler(schedulerOptions)) + { + // Get timesteps + var timesteps = GetTimesteps(schedulerOptions, scheduler); + + // Create latent sample + var latentsOriginal = await PrepareLatentsAsync(modelOptions, promptOptions, schedulerOptions, scheduler, timesteps); + + // Create masks sample + var maskImage = PrepareMask(modelOptions, promptOptions, schedulerOptions); + + // Generate some noise + var noise = scheduler.CreateRandomSample(latentsOriginal.Dimensions); + + // Add noise to original latent + var latents = scheduler.AddNoise(latentsOriginal, noise, timesteps); + + // Get Model metadata + var metadata = _onnxModelService.GetModelMetadata(modelOptions, OnnxModelType.Unet); + + // Get Guidance Scale Embedding + var guidanceEmbeddings = GetGuidanceScaleEmbedding(schedulerOptions.GuidanceScale); + + // Denoised result + DenseTensor denoised = null; + + // Loop though the timesteps + var step = 0; + foreach (var timestep in timesteps) + { + step++; + var stepTime = Stopwatch.GetTimestamp(); + cancellationToken.ThrowIfCancellationRequested(); + + // Create input tensor. + var inputTensor = scheduler.ScaleInput(latents, timestep); + var timestepTensor = CreateTimestepTensor(timestep); + + var outputChannels = 1; + var outputDimension = schedulerOptions.GetScaledDimension(outputChannels); + using (var inferenceParameters = new OnnxInferenceParameters(metadata)) + { + inferenceParameters.AddInputTensor(inputTensor); + inferenceParameters.AddInputTensor(timestepTensor); + inferenceParameters.AddInputTensor(promptEmbeddings); + inferenceParameters.AddInputTensor(guidanceEmbeddings); + inferenceParameters.AddOutputBuffer(outputDimension); + + var results = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.Unet, inferenceParameters); + using (var result = results.First()) + { + var noisePred = result.ToDenseTensor(); + + // Scheduler Step + var schedulerResult = scheduler.Step(noisePred, timestep, latents); + + latents = schedulerResult.Result; + denoised = schedulerResult.SampleData; + + // Add noise to original latent + if (step < timesteps.Count - 1) + { + var noiseTimestep = timesteps[step + 1]; + var initLatentsProper = scheduler.AddNoise(latentsOriginal, noise, new[] { noiseTimestep }); + + // Apply mask and combine + latents = ApplyMaskedLatents(schedulerResult.Result, initLatentsProper, maskImage); + } + } + } + + progressCallback?.Invoke(step, timesteps.Count); + _logger?.LogEnd($"Step {step}/{timesteps.Count}", stepTime); + } + + // Decode Latents + return await DecodeLatentsAsync(modelOptions, promptOptions, schedulerOptions, denoised); + } + } + + + /// + /// Prepares the input latents for inference. + /// + /// The model. + /// The prompt. + /// The options. + /// The scheduler. + /// The timesteps. + /// + protected override async Task> PrepareLatentsAsync(IModelOptions model, PromptOptions prompt, SchedulerOptions options, IScheduler scheduler, IReadOnlyList timesteps) + { + // Image input, decode, add noise, return as latent 0 + var imageTensor = prompt.InputImage.ToDenseTensor(new[] { 1, 3, options.Height, options.Width }); + + //TODO: Model Config, Channels + var outputDimensions = options.GetScaledDimension(); + var metadata = _onnxModelService.GetModelMetadata(model, OnnxModelType.VaeEncoder); + using (var inferenceParameters = new OnnxInferenceParameters(metadata)) + { + inferenceParameters.AddInputTensor(imageTensor); + inferenceParameters.AddOutputBuffer(outputDimensions); + + var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inferenceParameters); + using (var result = results.First()) + { + var outputResult = result.ToDenseTensor(); + var scaledSample = outputResult + .Add(scheduler.CreateRandomSample(outputDimensions, options.InitialNoiseLevel)) + .MultiplyBy(model.ScaleFactor); + + return scaledSample; + } + } + } + + + /// + /// Prepares the mask. + /// + /// The prompt options. + /// The scheduler options. + /// + private DenseTensor PrepareMask(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions) + { + using (var mask = promptOptions.InputImageMask.ToImage()) + { + // Prepare the mask + int width = schedulerOptions.GetScaledWidth(); + int height = schedulerOptions.GetScaledHeight(); + mask.Mutate(x => x.Grayscale()); + mask.Mutate(x => x.Resize(new Size(width, height), KnownResamplers.NearestNeighbor, true)); + var maskTensor = new DenseTensor(new[] { 1, 4, width, height }); + mask.ProcessPixelRows(img => + { + for (int x = 0; x < width; x++) + { + for (int y = 0; y < height; y++) + { + var pixelSpan = img.GetRowSpan(y); + var value = pixelSpan[x].A / 255.0f; + maskTensor[0, 0, y, x] = 1f - value; + maskTensor[0, 1, y, x] = 0f; // Needed for shape only + maskTensor[0, 2, y, x] = 0f; // Needed for shape only + maskTensor[0, 3, y, x] = 0f; // Needed for shape only + } + } + }); + + return maskTensor; + } + } + + + /// + /// Applies the masked latents. + /// + /// The latents. + /// The initialize latents proper. + /// The mask. + /// + private DenseTensor ApplyMaskedLatents(DenseTensor latents, DenseTensor initLatentsProper, DenseTensor mask) + { + var result = new DenseTensor(latents.Dimensions); + for (int batch = 0; batch < latents.Dimensions[0]; batch++) + { + for (int channel = 0; channel < latents.Dimensions[1]; channel++) + { + for (int height = 0; height < latents.Dimensions[2]; height++) + { + for (int width = 0; width < latents.Dimensions[3]; width++) + { + float maskValue = mask[batch, 0, height, width]; + float latentsValue = latents[batch, channel, height, width]; + float initLatentsProperValue = initLatentsProper[batch, channel, height, width]; + + //Apply the logic to compute the result based on the mask + float newValue = initLatentsProperValue * maskValue + latentsValue * (1f - maskValue); + result[batch, channel, height, width] = newValue; + } + } + } + } + return result; + } + } +} diff --git a/OnnxStack.StableDiffusion/Registration.cs b/OnnxStack.StableDiffusion/Registration.cs index 978a282f..46165289 100644 --- a/OnnxStack.StableDiffusion/Registration.cs +++ b/OnnxStack.StableDiffusion/Registration.cs @@ -42,6 +42,7 @@ public static void AddOnnxStackStableDiffusion(this IServiceCollection serviceCo //LatentConsistency serviceCollection.AddSingleton(); serviceCollection.AddSingleton(); + serviceCollection.AddSingleton(); } diff --git a/OnnxStack.UI/appsettings.json b/OnnxStack.UI/appsettings.json index 21d266c9..327151aa 100644 --- a/OnnxStack.UI/appsettings.json +++ b/OnnxStack.UI/appsettings.json @@ -70,7 +70,8 @@ "PipelineType": "LatentConsistency", "Diffusers": [ "TextToImage", - "ImageToImage" + "ImageToImage", + "ImageInpaintLegacy" ], "ModelFiles": [ "https://huggingface.co/TheyCallMeHex/LCM-Dreamshaper-V7-ONNX/resolve/main/tokenizer/model.onnx",