Skip to content

fix: add the momentum parameter's implemention of SGD #1161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,6 @@ IOptimizer RMSprop(float learning_rate = 0.001f,
bool centered = false,
string name = "RMSprop");

IOptimizer SGD(float learning_rate);
IOptimizer SGD(float learning_rate, float momentum);
}
}
4 changes: 4 additions & 0 deletions src/TensorFlowNET.Core/Training/gen_training_ops.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,9 @@ public static Tensor apply_gradient_descent(IVariableV1 var, Tensor alpha, Tenso
public static Tensor resource_apply_gradient_descent(Tensor var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null)
=> tf.Context.ExecuteOp("ResourceApplyGradientDescent", name,
new ExecuteOpArgs(var, alpha, delta).SetAttributes(new { use_locking }));

public static Tensor resource_apply_keras_momentum(Tensor var, Tensor accum, Tensor lr, Tensor grad, Tensor momentum, bool use_locking = false, bool use_nesterov = false, string name = null)
=> tf.Context.ExecuteOp("ResourceApplyKerasMomentum", name,
new ExecuteOpArgs(var, accum, lr, grad, momentum).SetAttributes(new { use_locking, use_nesterov }));
}
}
4 changes: 2 additions & 2 deletions src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public IOptimizer RMSprop(float learning_rate = 0.001f,
Name = name
});

public IOptimizer SGD(float learning_rate)
=> new SGD(learning_rate);
public IOptimizer SGD(float learning_rate, float momentum)
=> new SGD(learning_rate, momentum);
}
}
19 changes: 18 additions & 1 deletion src/TensorFlowNET.Keras/Optimizers/SGD.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public SGD(float learning_rate,
_set_hyper("decay", decay);

_momentum = momentum > 0;
if (momentum < 0 || momentum > 1)
throw new ValueError($"momentum must be a number between 0 and 1, got {momentum}.");

_set_hyper("momentum", momentum);

Expand All @@ -30,6 +32,13 @@ public SGD(float learning_rate,
#pragma warning restore CS1717 // Assignment made to same variable
}

protected override void _create_slots(IVariableV1[] var_list)
{
if (_momentum)
foreach (var var in var_list)
add_slot(var, "momentum");
}

protected override void _prepare_local(DeviceDType device_dtype,
Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
Expand All @@ -43,7 +52,15 @@ protected override Operation _resource_apply_dense(IVariableV1 var, Tensor grad,
{
if (_momentum)
{
throw new NotImplementedException("_resource_apply_dense");
var momentum_var = get_slot(var, "momentum");
return gen_training_ops.resource_apply_keras_momentum(
var.Handle,
momentum_var.Handle,
_get_hyper("learning_rate", var.dtype),
grad,
_get_hyper("momentum", var.dtype),
use_locking: _use_locking,
use_nesterov: nesterov);
}
var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype());

Expand Down