add DepthwiseConv2D (深度可分离卷积)

dogvane · dogvane · commit ba8f0b084fe3 · 2023-10-08T21:45:26.000+08:00
diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs
@@ -80,6 +80,11 @@ BackwardFunction GetGradientFunction(string op_name,
                      Tensor[] op_outputs)
             => (out_grads, unneeded_gradients) =>
             {
+                if(!ops.gradientFunctions.ContainsKey(op_name))
+                {
+                    throw new Exception($"gradientFunctions not find op_name: {op_name}");
+                }
+
                 if (ops.gradientFunctions[op_name] == null)
                     return new Tensor[op_inputs.Length];
 
diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
@@ -229,6 +229,37 @@ public static Tensor[] _Conv2DGrad(Operation op, Tensor[] grads)
             };
         }
 
+        /// <summary>
+        /// Gradient function for Conv2D.
+        /// </summary>
+        /// <param name="op"></param>
+        /// <param name="grads"></param>
+        /// <returns></returns>
+        [RegisterGradient("DepthwiseConv2dNative")]
+        public static Tensor[] _DepthwiseConv2DGrad(Operation op, Tensor[] grads)
+        {
+            var dilations = op.get_attr_list<int>("dilations");
+            var strides = op.get_attr_list<int>("strides");
+            var padding = op.get_attr<string>("padding");
+            var explicit_paddings = op.get_attr_list<int>("explicit_paddings");
+            var data_format = op.get_attr<string>("data_format");
+            var shape = gen_array_ops.shape_n(new Tensor[] { op.inputs[0], op.inputs[1] });
+
+            return new Tensor[]
+            {
+                gen_nn_ops.depthwise_conv2d_native_backprop_input(
+                    shape[0], op.inputs[1], grads[0],
+                    strides, padding, explicit_paddings,
+                    dilations: dilations,
+                    data_format: data_format),
+                gen_nn_ops.depthwise_conv2d_native_backprop_filter(op.inputs[0], shape[1], grads[0],
+                    strides, padding,
+                    dilations: dilations,
+                    explicit_paddings: explicit_paddings,                    
+                    data_format: data_format)
+            };
+        }
+
         [RegisterGradient("FusedBatchNorm")]
         public static Tensor[] _FusedBatchNormGrad(Operation op, Tensor[] grads)
             => _BaseFusedBatchNormGrad(op, 0, grads);
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -95,6 +95,19 @@ public ILayer Conv2D(int filters,
             bool use_bias = true,
             string kernel_initializer = "glorot_uniform",
             string bias_initializer = "zeros");
+        public ILayer DepthwiseConv2D(Shape kernel_size = null,
+            Shape strides = null,
+            string padding = "valid",
+            string data_format = null,
+            Shape dilation_rate = null,
+            int groups = 1,
+            int depth_multiplier = 1,
+            string activation = null,
+            bool use_bias = false,
+            string kernel_initializer = "glorot_uniform",
+            string bias_initializer = "zeros",
+            string depthwise_initializer = "glorot_uniform"
+            );
 
         public ILayer Dense(int units);
         public ILayer Dense(int units,
diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -249,6 +249,9 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
                     case sbyte val:
                         tensor_proto.IntVal.AddRange(new[] { (int)val });
                         break;
+                    case byte val:
+                        tensor_proto.IntVal.AddRange(new[] { (int)val });
+                        break;
                     case int val:
                         tensor_proto.IntVal.AddRange(new[] { val });
                         break;
@@ -262,7 +265,7 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
                         tensor_proto.DoubleVal.AddRange(new[] { val });
                         break;
                     default:
-                        throw new Exception("make_tensor_proto Not Implemented");
+                        throw new Exception($"make_tensor_proto Not Implemented {values.GetType().Name}");
                 }
             }
 
diff --git a/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs b/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs
@@ -0,0 +1,167 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using System;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Saving;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.Utils;
+using Tensorflow.Operations;
+using Newtonsoft.Json;
+using System.Security.Cryptography;
+
+namespace Tensorflow.Keras.Layers
+{
+    public class DepthwiseConv2DArgs: Conv2DArgs
+    {
+        /// <summary>
+        /// depth_multiplier: The number of depthwise convolution output channels for
+        /// each input channel.The total number of depthwise convolution output
+        /// channels will be equal to `filters_in* depth_multiplier`.
+        /// </summary>
+        [JsonProperty("depth_multiplier")]
+        public int DepthMultiplier { get; set; } = 1;
+
+        [JsonProperty("depthwise_initializer")]
+        public IInitializer DepthwiseInitializer { get; set; }
+    }
+
+    public class DepthwiseConv2D : Conv2D
+    {
+        /// <summary>
+        /// depth_multiplier: The number of depthwise convolution output channels for
+        /// each input channel.The total number of depthwise convolution output
+        /// channels will be equal to `filters_in* depth_multiplier`.
+        /// </summary>
+        int DepthMultiplier = 1;
+        
+        IInitializer DepthwiseInitializer;
+
+        int[] strides;
+
+        int[] dilation_rate;
+
+        string getDataFormat()
+        {
+            return data_format == "channels_first" ? "NCHW" : "NHWC";
+        }
+
+        static int _id = 1;
+
+        public DepthwiseConv2D(DepthwiseConv2DArgs args):base(args)
+        {
+            args.Padding = args.Padding.ToUpper();
+
+            if(string.IsNullOrEmpty(args.Name))
+                name = "DepthwiseConv2D_" + _id;
+
+            this.DepthMultiplier = args.DepthMultiplier;
+            this.DepthwiseInitializer = args.DepthwiseInitializer;
+
+        }
+
+        public override void build(KerasShapesWrapper input_shape)
+        {
+            //base.build(input_shape);
+
+            var shape = input_shape.ToSingleShape();
+
+            int channel_axis = data_format == "channels_first" ? 1 : -1;
+            var input_channel = channel_axis < 0 ?
+                shape.dims[shape.ndim + channel_axis] :
+                shape.dims[channel_axis];
+
+            var arg = args as DepthwiseConv2DArgs;
+
+            if (arg.Strides.ndim != shape.ndim)
+            {
+                if (arg.Strides.ndim == 2)
+                {
+                    this.strides = new int[] { 1, (int)arg.Strides[0], (int)arg.Strides[1], 1 };
+                }
+                else
+                {
+                    this.strides = conv_utils.normalize_tuple(new int[] { (int)arg.Strides[0] }, shape.ndim, "strides");
+                }
+            }
+            else
+            {
+                this.strides = arg.Strides.dims.Select(o=>(int)(o)).ToArray();
+            }
+
+            if (arg.DilationRate.ndim != shape.ndim)
+            {
+                this.dilation_rate = conv_utils.normalize_tuple(new int[] { (int)arg.DilationRate[0] }, shape.ndim, "dilation_rate");
+            }
+
+            long channel_data = data_format == "channels_first" ? shape[0] : shape[shape.Length - 1];
+
+            var depthwise_kernel_shape = this.kernel_size.dims.concat(new long[] {
+                channel_data,
+                this.DepthMultiplier
+            });
+
+            this.kernel = this.add_weight(
+                shape: depthwise_kernel_shape,
+                initializer: this.DepthwiseInitializer != null ? this.DepthwiseInitializer : this.kernel_initializer,
+                name: "depthwise_kernel",
+                trainable: true,
+                dtype: DType,
+                regularizer: this.kernel_regularizer
+            );
+
+            var axes = new Dictionary<int, int>();
+            axes.Add(-1, (int)input_channel);
+            inputSpec = new InputSpec(min_ndim: rank + 2, axes: axes);
+
+
+            if (use_bias)
+            {
+                bias = add_weight(name: "bias",
+                    shape: ((int)channel_data),
+                    initializer: bias_initializer,
+                    trainable: true,
+                    dtype: DType);
+            }
+
+            built = true;
+            _buildInputShape = input_shape;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors state = null,
+            bool? training = false, IOptionalArgs? optional_args = null)
+        {
+            Tensor outputs = null;
+
+            outputs = gen_nn_ops.depthwise_conv2d_native(
+                    inputs,
+                    filter: this.kernel.AsTensor(),
+                    strides: this.strides,
+                    padding: this.padding,
+                    dilations: this.dilation_rate,
+                    data_format: this.getDataFormat(),
+                    name: name
+                );
+
+            if (use_bias)
+            {
+                if (data_format == "channels_first")
+                {
+                    throw new NotImplementedException("call channels_first");
+                }
+                else
+                {
+                    outputs = gen_nn_ops.bias_add(outputs, ops.convert_to_tensor(bias),
+                        data_format: this.getDataFormat(), name: name);
+                }
+            }
+
+            if (activation != null)
+                outputs = activation.Apply(outputs);
+
+
+            return outputs;
+        }
+
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -210,6 +210,38 @@ public ILayer Conv2D(int filters,
                     Activation = keras.activations.GetActivationFromName(activation)
                 });
 
+        public ILayer DepthwiseConv2D(Shape kernel_size = null,
+                Shape strides = null,
+                string padding = "valid",
+                string data_format = null,
+                Shape dilation_rate = null,
+                int groups = 1,
+                int depth_multiplier = 1,
+                string activation = null,
+                bool use_bias = false,
+                string kernel_initializer = "glorot_uniform",
+                string bias_initializer = "zeros",
+                string depthwise_initializer = "glorot_uniform"
+                )
+                    => new DepthwiseConv2D(new DepthwiseConv2DArgs
+                    {
+                        Rank = 2,
+                        Filters = 1,
+                        KernelSize = (kernel_size == null) ? (5, 5) : kernel_size,
+                        Strides = strides == null ? (1) : strides,
+                        Padding = padding,
+                        DepthMultiplier = depth_multiplier,
+                        DataFormat = data_format,
+                        DilationRate = dilation_rate == null ? (1) : dilation_rate,
+                        Groups = groups,
+                        UseBias = use_bias,
+                        KernelInitializer = GetInitializerByName(kernel_initializer),
+                        DepthwiseInitializer = GetInitializerByName(depthwise_initializer == null ? kernel_initializer : depthwise_initializer),
+                        BiasInitializer = GetInitializerByName(bias_initializer),
+                        Activation = keras.activations.GetActivationFromName(activation),
+                    });
+
+
         /// <summary>
         /// Transposed convolution layer (sometimes called Deconvolution).
         /// </summary>
diff --git a/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs
@@ -33,6 +33,40 @@ public bool Equal(float[] f1, float[] f2)
             return ret;
         }
 
+
+        public void AssertArray(int[] f1, int[] f2)
+        {
+            bool ret = false;
+            for (var i = 0; i < f1.Length; i++)
+            {
+                ret = f1[i] == f2[i];
+                if (!ret)
+                    break;
+            }
+
+            if (!ret)
+            {
+                Assert.Fail($"Array not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]");
+            }
+        }
+
+        public void AssertArray(float[] f1, float[] f2)
+        {
+            bool ret = false;
+            var tolerance = .00001f;
+            for (var i = 0; i < f1.Length; i++)
+            {
+                ret = Math.Abs(f1[i] - f2[i]) <= tolerance;
+                if (!ret)
+                    break;
+            }
+
+            if (!ret)
+            {
+                Assert.Fail($"Array float not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]");
+            }
+        }
+
         public bool Equal(double[] d1, double[] d2)
         {
             bool ret = false;
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs
diff --git a/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs