TensorStack-AI · saddam213 · Mar 30, 2024 · Mar 30, 2024
diff --git a/OnnxStack.Console/Examples/BackgroundRemovalImageExample.cs b/OnnxStack.Console/Examples/BackgroundRemovalImageExample.cs
diff --git a/OnnxStack.Console/Examples/BackgroundRemovalVideoExample.cs b/OnnxStack.Console/Examples/BackgroundRemovalVideoExample.cs
diff --git a/OnnxStack.Console/Examples/ControlNetFeatureExample.cs b/OnnxStack.Console/Examples/ControlNetFeatureExample.cs
@@ -35,7 +35,7 @@ public async Task RunAsync()
             var inputImage = await OnnxImage.FromFileAsync("D:\\Repositories\\OnnxStack\\Assets\\Samples\\Img2Img_Start.bmp");
 
             // Create Annotation pipeline
-            var annotationPipeline = FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\depth.onnx", true);
+            var annotationPipeline = FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\depth.onnx", sampleSize: 512, normalizeOutputTensor: true);
 
             // Create Depth Image
             var controlImage = await annotationPipeline.RunAsync(inputImage);

diff --git a/OnnxStack.Console/Examples/FeatureExtractorExample.cs b/OnnxStack.Console/Examples/FeatureExtractorExample.cs
@@ -37,10 +37,8 @@ public async Task RunAsync()
             {
                 FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\canny.onnx"),
                 FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\hed.onnx"),
-                FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\depth.onnx", true),
-
-               // FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\depth-anything-large-hf\\onnx\\model.onnx", normalize: true, sampleSize: 504),
-               // FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\sentis-MiDaS\\dpt_beit_large_512.onnx", normalize: true, sampleSize: 384),
+                FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\controlnet_onnx\\annotators\\depth.onnx", sampleSize: 512, normalizeOutputTensor: true, inputResizeMode: ImageResizeMode.Stretch),
+                FeatureExtractorPipeline.CreatePipeline("D:\\Repositories\\RMBG-1.4\\onnx\\model.onnx", sampleSize: 1024, setOutputToInputAlpha: true, inputResizeMode: ImageResizeMode.Stretch)
             };
 
             foreach (var pipeline in pipelines)
@@ -49,7 +47,7 @@ public async Task RunAsync()
                 OutputHelpers.WriteConsole($"Load pipeline`{pipeline.Name}`", ConsoleColor.Cyan);
 
                 // Run Image Pipeline
-                var imageFeature = await pipeline.RunAsync(inputImage);
+                var imageFeature = await pipeline.RunAsync(inputImage.Clone());
 
                 OutputHelpers.WriteConsole($"Generating image", ConsoleColor.Cyan);
 

diff --git a/OnnxStack.Core/Extensions/Extensions.cs b/OnnxStack.Core/Extensions/Extensions.cs
@@ -1,4 +1,5 @@
 using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
 using OnnxStack.Core.Config;
 using System;
 using System.Collections.Concurrent;
@@ -244,5 +245,26 @@ public static long[] ToLong(this int[] array)
         {
             return Array.ConvertAll(array, Convert.ToInt64);
         }
+
+
+        /// <summary>
+        /// Normalize the data using Min-Max scaling to ensure all values are in the range [0, 1].
+        /// </summary>
+        /// <param name="values">The values.</param>
+        public static void NormalizeMinMax(this Span<float> values)
+        {
+            float min = float.PositiveInfinity, max = float.NegativeInfinity;
+            foreach (var val in values)
+            {
+                if (min > val) min = val;
+                if (max < val) max = val;
+            }
+
+            var range = max - min;
+            for (var i = 0; i < values.Length; i++)
+            {
+                values[i] = (values[i] - min) / range;
+            }
+        }
     }
 }
diff --git a/OnnxStack.Core/Extensions/TensorExtension.cs b/OnnxStack.Core/Extensions/TensorExtension.cs
@@ -286,19 +286,7 @@ public static DenseTensor<float> Repeat(this DenseTensor<float> tensor1, int cou
         /// <param name="tensor">The tensor.</param>
         public static void NormalizeMinMax(this DenseTensor<float> tensor)
         {
-            var values = tensor.Buffer.Span;
-            float min = float.PositiveInfinity, max = float.NegativeInfinity;
-            foreach (var val in values)
-            {
-                if (min > val) min = val;
-                if (max < val) max = val;
-            }
-
-            var range = max - min;
-            for (var i = 0; i < values.Length; i++)
-            {
-                values[i] = (values[i] - min) / range;
-            }
+            tensor.Buffer.Span.NormalizeMinMax();
         }
 
 

diff --git a/OnnxStack.Core/Image/Extensions.cs b/OnnxStack.Core/Image/Extensions.cs
@@ -1,6 +1,7 @@
 using Microsoft.ML.OnnxRuntime.Tensors;
 using SixLabors.ImageSharp;
 using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Processing;
 
 namespace OnnxStack.Core.Image
 {
@@ -29,11 +30,27 @@ public static OnnxImage ToImageMask(this DenseTensor<float> imageTensor)
             }
         }
 
+
+        public static ResizeMode ToResizeMode(this ImageResizeMode resizeMode)
+        {
+            return resizeMode switch
+            {
+                ImageResizeMode.Stretch => ResizeMode.Stretch,
+                _ => ResizeMode.Crop
+            };
+        }
+
     }
 
     public enum ImageNormalizeType
     {
         ZeroToOne = 0,
-        OneToOne = 1,
+        OneToOne = 1
+    }
+
+    public enum ImageResizeMode
+    {
+        Crop = 0,
+        Stretch = 1
     }
 }
diff --git a/OnnxStack.Core/Image/OnnxImage.cs b/OnnxStack.Core/Image/OnnxImage.cs
@@ -230,15 +230,27 @@ public DenseTensor<float> GetImageTensor(ImageNormalizeType normalizeType = Imag
         /// <param name="normalizeType">Type of the normalize.</param>
         /// <param name="channels">The channels.</param>
         /// <returns></returns>
-        public DenseTensor<float> GetImageTensor(int height, int width, ImageNormalizeType normalizeType = ImageNormalizeType.OneToOne, int channels = 3)
+        public DenseTensor<float> GetImageTensor(int height, int width, ImageNormalizeType normalizeType = ImageNormalizeType.OneToOne, int channels = 3, ImageResizeMode resizeMode = ImageResizeMode.Crop)
         {
             if (height > 0 && width > 0)
-                Resize(height, width);
+                Resize(height, width, resizeMode);
 
             return GetImageTensor(normalizeType, channels);
         }
 
 
+        /// <summary>
+        /// Gets the image as tensor asynchronously.
+        /// </summary>
+        /// <param name="normalizeType">Type of the normalize.</param>
+        /// <param name="channels">The channels.</param>
+        /// <returns></returns>
+        public Task<DenseTensor<float>> GetImageTensorAsync(ImageNormalizeType normalizeType = ImageNormalizeType.OneToOne, int channels = 3)
+        {
+            return Task.Run(() => GetImageTensor(normalizeType, channels));
+        }
+
+
         /// <summary>
         /// Gets the image as tensor asynchronously.
         /// </summary>
@@ -247,9 +259,9 @@ public DenseTensor<float> GetImageTensor(int height, int width, ImageNormalizeTy
         /// <param name="normalizeType">Type of the normalize.</param>
         /// <param name="channels">The channels.</param>
         /// <returns></returns>
-        public Task<DenseTensor<float>> GetImageTensorAsync(int height, int width, ImageNormalizeType normalizeType = ImageNormalizeType.OneToOne, int channels = 3)
+        public Task<DenseTensor<float>> GetImageTensorAsync(int height, int width, ImageNormalizeType normalizeType = ImageNormalizeType.OneToOne, int channels = 3, ImageResizeMode resizeMode = ImageResizeMode.Crop)
         {
-            return Task.Run(() => GetImageTensor(height, width, normalizeType, channels));
+            return Task.Run(() => GetImageTensor(height, width, normalizeType, channels, resizeMode));
         }
 
 
@@ -259,20 +271,21 @@ public Task<DenseTensor<float>> GetImageTensorAsync(int height, int width, Image
         /// <param name="height">The height.</param>
         /// <param name="width">The width.</param>
         /// <param name="resizeMode">The resize mode.</param>
-        public void Resize(int height, int width, ResizeMode resizeMode = ResizeMode.Crop)
+        public void Resize(int height, int width, ImageResizeMode resizeMode = ImageResizeMode.Crop)
         {
             _imageData.Mutate(x =>
             {
                 x.Resize(new ResizeOptions
                 {
                     Size = new Size(width, height),
-                    Mode = resizeMode,
+                    Mode = resizeMode.ToResizeMode(),
                     Sampler = KnownResamplers.Lanczos8,
                     Compand = true
                 });
             });
         }
 
+
         public OnnxImage Clone()
         {
             return new OnnxImage(_imageData);

diff --git a/OnnxStack.FeatureExtractor/Common/FeatureExtractorModel.cs b/OnnxStack.FeatureExtractor/Common/FeatureExtractorModel.cs
@@ -1,56 +1,52 @@
 using Microsoft.ML.OnnxRuntime;
 using OnnxStack.Core.Config;
+using OnnxStack.Core.Image;
 using OnnxStack.Core.Model;
 
 namespace OnnxStack.FeatureExtractor.Common
 {
     public class FeatureExtractorModel : OnnxModelSession
     {
-        private readonly int _sampleSize;
-        private readonly bool _normalize;
-        private readonly int _channels;
+        private readonly FeatureExtractorModelConfig _configuration;
 
         public FeatureExtractorModel(FeatureExtractorModelConfig configuration)
             : base(configuration)
         {
-            _sampleSize = configuration.SampleSize;
-            _normalize = configuration.Normalize;
-            _channels = configuration.Channels;
+            _configuration = configuration;
         }
 
-        public int SampleSize => _sampleSize;
-
-        public bool Normalize => _normalize;
-
-        public int Channels => _channels;
+        public int OutputChannels => _configuration.OutputChannels;
+        public int SampleSize => _configuration.SampleSize;
+        public bool NormalizeOutputTensor => _configuration.NormalizeOutputTensor;
+        public bool SetOutputToInputAlpha => _configuration.SetOutputToInputAlpha;
+        public ImageResizeMode InputResizeMode => _configuration.InputResizeMode;
+        public ImageNormalizeType InputNormalization => _configuration.NormalizeInputTensor;
 
         public static FeatureExtractorModel Create(FeatureExtractorModelConfig configuration)
         {
             return new FeatureExtractorModel(configuration);
         }
 
-        public static FeatureExtractorModel Create(string modelFile, bool normalize = false, int sampleSize = 512, int channels = 3, int deviceId = 0, ExecutionProvider executionProvider = ExecutionProvider.DirectML)
+        public static FeatureExtractorModel Create(string modelFile, int sampleSize = 0, int outputChannels = 1, bool normalizeOutputTensor = false, ImageNormalizeType normalizeInputTensor = ImageNormalizeType.ZeroToOne, ImageResizeMode inputResizeMode = ImageResizeMode.Crop, bool setOutputToInputAlpha = false, int deviceId = 0, ExecutionProvider executionProvider = ExecutionProvider.DirectML)
         {
             var configuration = new FeatureExtractorModelConfig
             {
-                SampleSize = sampleSize,
-                Normalize = normalize,
-                Channels = channels,
                 DeviceId = deviceId,
                 ExecutionProvider = executionProvider,
                 ExecutionMode = ExecutionMode.ORT_SEQUENTIAL,
                 InterOpNumThreads = 0,
                 IntraOpNumThreads = 0,
-                OnnxModelPath = modelFile
+                OnnxModelPath = modelFile,
+
+
+                SampleSize = sampleSize,
+                OutputChannels = outputChannels,
+                NormalizeOutputTensor = normalizeOutputTensor,
+                SetOutputToInputAlpha = setOutputToInputAlpha,
+                NormalizeInputTensor = normalizeInputTensor,
+                InputResizeMode = inputResizeMode
             };
             return new FeatureExtractorModel(configuration);
         }
     }
-
-    public record FeatureExtractorModelConfig : OnnxModelConfig
-    {
-        public int SampleSize { get; set; }
-        public bool Normalize { get; set; }
-        public int Channels { get; set; }
-    }
 }
diff --git a/OnnxStack.FeatureExtractor/Common/FeatureExtractorModelConfig.cs b/OnnxStack.FeatureExtractor/Common/FeatureExtractorModelConfig.cs
@@ -0,0 +1,15 @@
+using OnnxStack.Core.Config;
+using OnnxStack.Core.Image;
+
+namespace OnnxStack.FeatureExtractor.Common
+{
+    public record FeatureExtractorModelConfig : OnnxModelConfig
+    {
+        public int SampleSize { get; set; }
+        public int OutputChannels { get; set; }
+        public bool NormalizeOutputTensor { get; set; }
+        public bool SetOutputToInputAlpha { get; set; }
+        public ImageResizeMode InputResizeMode { get; set; }
+        public ImageNormalizeType NormalizeInputTensor { get; set; }
+    }
+}