diff --git a/src/nf/nf_conv1d_layer.f90 b/src/nf/nf_conv1d_layer.f90 index c39b11fc..871eef02 100644 --- a/src/nf/nf_conv1d_layer.f90 +++ b/src/nf/nf_conv1d_layer.f90 @@ -32,8 +32,10 @@ module nf_conv1d_layer procedure :: forward procedure :: backward procedure :: get_gradients + procedure :: get_gradients_ptr procedure :: get_num_params procedure :: get_params + procedure :: get_params_ptr procedure :: init procedure :: set_params @@ -97,6 +99,16 @@ module function get_params(self) result(params) !! Parameters to get end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + !! Return pointers to the parameters (weights and biases) of this layer. + class(conv1d_layer), intent(in), target :: self + !! A `conv1d_layer` instance + real, pointer, intent(out) :: w_ptr(:) + !! Pointer to the kernel weights (flattened) + real, pointer, intent(out) :: b_ptr(:) + !! Pointer to the biases + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) !! Return the gradients of this layer. !! The gradients are ordered as weights first, biases second. @@ -106,6 +118,16 @@ module function get_gradients(self) result(gradients) !! Gradients to get end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + !! Return pointers to the gradients of this layer. + class(conv1d_layer), intent(in), target :: self + !! A `conv1d_layer` instance + real, pointer, intent(out) :: dw_ptr(:) + !! Pointer to the kernel weight gradients (flattened) + real, pointer, intent(out) :: db_ptr(:) + !! Pointer to the bias gradients + end subroutine get_gradients_ptr + module subroutine set_params(self, params) !! Set the parameters of the layer. class(conv1d_layer), intent(in out) :: self diff --git a/src/nf/nf_conv1d_layer_submodule.f90 b/src/nf/nf_conv1d_layer_submodule.f90 index 5404b9c7..05bcde70 100644 --- a/src/nf/nf_conv1d_layer_submodule.f90 +++ b/src/nf/nf_conv1d_layer_submodule.f90 @@ -152,6 +152,14 @@ module function get_params(self) result(params) params = [ w_, self % biases] end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(conv1d_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + w_ptr(1:size(self % kernel)) => self % kernel + b_ptr => self % biases + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) class(conv1d_layer), intent(in), target :: self real, allocatable :: gradients(:) @@ -160,6 +168,14 @@ module function get_gradients(self) result(gradients) gradients = [ dw_, self % db ] end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(conv1d_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + dw_ptr(1:size(self % dw)) => self % dw + db_ptr => self % db + end subroutine get_gradients_ptr + module subroutine set_params(self, params) class(conv1d_layer), intent(in out) :: self real, intent(in) :: params(:) diff --git a/src/nf/nf_conv2d_layer.f90 b/src/nf/nf_conv2d_layer.f90 index 4b79376e..3f7b28db 100644 --- a/src/nf/nf_conv2d_layer.f90 +++ b/src/nf/nf_conv2d_layer.f90 @@ -33,8 +33,10 @@ module nf_conv2d_layer procedure :: forward procedure :: backward procedure :: get_gradients + procedure :: get_gradients_ptr procedure :: get_num_params procedure :: get_params + procedure :: get_params_ptr procedure :: init procedure :: set_params @@ -98,6 +100,16 @@ module function get_params(self) result(params) !! Parameters to get end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + !! Return pointers to the parameters (weights and biases) of this layer. + class(conv2d_layer), intent(in), target :: self + !! A `conv2d_layer` instance + real, pointer, intent(out) :: w_ptr(:) + !! Pointer to the kernel weights (flattened) + real, pointer, intent(out) :: b_ptr(:) + !! Pointer to the biases + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) !! Return the gradients of this layer. !! The gradients are ordered as weights first, biases second. @@ -107,6 +119,16 @@ module function get_gradients(self) result(gradients) !! Gradients to get end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + !! Return pointers to the gradients of this layer. + class(conv2d_layer), intent(in), target :: self + !! A `conv2d_layer` instance + real, pointer, intent(out) :: dw_ptr(:) + !! Pointer to the kernel weight gradients (flattened) + real, pointer, intent(out) :: db_ptr(:) + !! Pointer to the bias gradients + end subroutine get_gradients_ptr + module subroutine set_params(self, params) !! Set the parameters of the layer. class(conv2d_layer), intent(in out) :: self diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90 index 45a2c1da..b617ec34 100644 --- a/src/nf/nf_conv2d_layer_submodule.f90 +++ b/src/nf/nf_conv2d_layer_submodule.f90 @@ -204,6 +204,15 @@ module function get_params(self) result(params) end function get_params + + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(conv2d_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + w_ptr(1:size(self % kernel)) => self % kernel + b_ptr => self % biases + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) class(conv2d_layer), intent(in), target :: self @@ -221,6 +230,15 @@ module function get_gradients(self) result(gradients) end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(conv2d_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + dw_ptr(1:size(self % dw)) => self % dw + db_ptr => self % db + end subroutine get_gradients_ptr + + module subroutine set_params(self, params) class(conv2d_layer), intent(in out) :: self real, intent(in) :: params(:) diff --git a/src/nf/nf_dense_layer.f90 b/src/nf/nf_dense_layer.f90 index 862f4cdf..a55ec892 100644 --- a/src/nf/nf_dense_layer.f90 +++ b/src/nf/nf_dense_layer.f90 @@ -34,8 +34,10 @@ module nf_dense_layer procedure :: backward procedure :: forward procedure :: get_gradients + procedure :: get_gradients_ptr procedure :: get_num_params procedure :: get_params + procedure :: get_params_ptr procedure :: init procedure :: set_params @@ -96,6 +98,12 @@ module function get_params(self) result(params) !! Parameters of this layer end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(dense_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) !! Return the gradients of this layer. !! The gradients are ordered as weights first, biases second. @@ -105,6 +113,12 @@ module function get_gradients(self) result(gradients) !! Gradients of this layer end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(dense_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + end subroutine get_gradients_ptr + module subroutine set_params(self, params) !! Set the parameters of this layer. !! The parameters are ordered as weights first, biases second. diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90 index a424cf9c..bb27c54a 100644 --- a/src/nf/nf_dense_layer_submodule.f90 +++ b/src/nf/nf_dense_layer_submodule.f90 @@ -77,6 +77,15 @@ module function get_params(self) result(params) end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(dense_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + w_ptr(1:size(self % weights)) => self % weights + b_ptr => self % biases + end subroutine get_params_ptr + + module function get_gradients(self) result(gradients) class(dense_layer), intent(in), target :: self real, allocatable :: gradients(:) @@ -93,6 +102,15 @@ module function get_gradients(self) result(gradients) end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(dense_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + dw_ptr(1:size(self % dw)) => self % dw + db_ptr => self % db + end subroutine get_gradients_ptr + + module subroutine set_params(self, params) class(dense_layer), intent(in out) :: self real, intent(in), target :: params(:) diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index 517622b0..b12592f3 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -22,6 +22,7 @@ module nf_layer integer, allocatable :: layer_shape(:) integer, allocatable :: input_layer_shape(:) logical :: initialized = .false. + class(optimizer_base_type), allocatable :: optimizer contains diff --git a/src/nf/nf_locally_connected1d_layer.f90 b/src/nf/nf_locally_connected1d_layer.f90 index beca76d5..6fea2c5c 100644 --- a/src/nf/nf_locally_connected1d_layer.f90 +++ b/src/nf/nf_locally_connected1d_layer.f90 @@ -32,8 +32,10 @@ module nf_locally_connected1d_layer procedure :: forward procedure :: backward procedure :: get_gradients + procedure :: get_gradients_ptr procedure :: get_num_params procedure :: get_params + procedure :: get_params_ptr procedure :: init procedure :: set_params @@ -97,6 +99,12 @@ module function get_params(self) result(params) !! Parameters to get end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(locally_connected1d_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) !! Return the gradients of this layer. !! The gradients are ordered as weights first, biases second. @@ -106,6 +114,12 @@ module function get_gradients(self) result(gradients) !! Gradients to get end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(locally_connected1d_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + end subroutine get_gradients_ptr + module subroutine set_params(self, params) !! Set the parameters of the layer. class(locally_connected1d_layer), intent(in out) :: self diff --git a/src/nf/nf_locally_connected1d_layer_submodule.f90 b/src/nf/nf_locally_connected1d_layer_submodule.f90 index 053c520b..fa6110d5 100644 --- a/src/nf/nf_locally_connected1d_layer_submodule.f90 +++ b/src/nf/nf_locally_connected1d_layer_submodule.f90 @@ -128,12 +128,28 @@ module function get_params(self) result(params) params = [self % kernel, self % biases] end function get_params + module subroutine get_params_ptr(self, w_ptr, b_ptr) + class(locally_connected1d_layer), intent(in), target :: self + real, pointer, intent(out) :: w_ptr(:) + real, pointer, intent(out) :: b_ptr(:) + w_ptr(1:size(self % kernel)) => self % kernel + b_ptr(1:size(self % biases)) => self % biases + end subroutine get_params_ptr + module function get_gradients(self) result(gradients) class(locally_connected1d_layer), intent(in), target :: self real, allocatable :: gradients(:) gradients = [self % dw, self % db] end function get_gradients + module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) + class(locally_connected1d_layer), intent(in), target :: self + real, pointer, intent(out) :: dw_ptr(:) + real, pointer, intent(out) :: db_ptr(:) + dw_ptr(1:size(self % dw)) => self % dw + db_ptr(1:size(self % db)) => self % db + end subroutine get_gradients_ptr + module subroutine set_params(self, params) class(locally_connected1d_layer), intent(in out) :: self real, intent(in) :: params(:) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index d8f5ff50..876070bc 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -597,12 +597,26 @@ module subroutine train(self, input_data, output_data, batch_size, & ! If not provided, we default to SGD with its default settings. if (present(optimizer)) then self % optimizer = optimizer + + do n = 1, size(self % layers) + self % layers(n) % optimizer = optimizer + end do + else self % optimizer = sgd() + + do n = 1, size(self % layers) + self % layers(n) % optimizer = sgd() + end do + end if call self % optimizer % init(self % get_num_params()) + do n = 1, size(self % layers) + call self % layers(n) % optimizer % init(self % layers(n) % get_num_params()) + end do + ! Passing the loss instance is optional. ! If not provided, we default to quadratic(). if (present(loss)) then @@ -649,6 +663,7 @@ module subroutine update(self, optimizer, batch_size) integer, intent(in), optional :: batch_size integer :: batch_size_ real, allocatable :: params(:) + real, pointer :: weights(:), biases(:), dw(:), db(:) integer :: n ! Passing the optimizer instance is optional. If not provided, and if the @@ -661,10 +676,26 @@ module subroutine update(self, optimizer, batch_size) if (.not. allocated(self % optimizer)) then if (present(optimizer)) then self % optimizer = optimizer + + do n = 1, size(self % layers) + self % layers(n) % optimizer = optimizer + end do + else self % optimizer = sgd() + + do n = 1, size(self % layers) + self % layers(n) % optimizer = sgd() + end do + end if + call self % optimizer % init(self % get_num_params()) + + do n = 1, size(self % layers) + call self % layers(n) % optimizer % init(self % layers(n) % get_num_params()) + end do + end if if (present(batch_size)) then @@ -693,23 +724,34 @@ module subroutine update(self, optimizer, batch_size) end do #endif - params = self % get_params() - call self % optimizer % minimize(params, self % get_gradients() / batch_size_) - call self % set_params(params) - - ! Flush network gradients to zero. do n = 2, size(self % layers) select type(this_layer => self % layers(n) % p) type is(dense_layer) + call this_layer % get_params_ptr(weights, biases) + call this_layer % get_gradients_ptr(dw, db) + call self % layers(n) %optimizer % minimize(weights, dw / batch_size_) + call self % layers(n) %optimizer % minimize(biases, db / batch_size_) this_layer % dw = 0 this_layer % db = 0 - type is(conv2d_layer) + type is(conv1d_layer) + call this_layer % get_params_ptr(weights, biases) + call this_layer % get_gradients_ptr(dw, db) + call self % layers(n) %optimizer % minimize(weights, dw / batch_size_) + call self % layers(n) %optimizer % minimize(biases, db / batch_size_) this_layer % dw = 0 this_layer % db = 0 - type is(conv1d_layer) + type is(conv2d_layer) + call this_layer % get_params_ptr(weights, biases) + call this_layer % get_gradients_ptr(dw, db) + call self % layers(n) %optimizer % minimize(weights, dw / batch_size_) + call self % layers(n) %optimizer % minimize(biases, db / batch_size_) this_layer % dw = 0 this_layer % db = 0 type is(locally_connected1d_layer) + call this_layer % get_params_ptr(weights, biases) + call this_layer % get_gradients_ptr(dw, db) + call self % layers(n) %optimizer % minimize(weights, dw / batch_size_) + call self % layers(n) %optimizer % minimize(biases, db / batch_size_) this_layer % dw = 0 this_layer % db = 0 end select diff --git a/src/nf/nf_optimizers.f90 b/src/nf/nf_optimizers.f90 index c64cefed..24089ccd 100644 --- a/src/nf/nf_optimizers.f90 +++ b/src/nf/nf_optimizers.f90 @@ -248,4 +248,4 @@ pure subroutine minimize_adagrad(self, param, gradient) end subroutine minimize_adagrad -end module nf_optimizers +end module nf_optimizers \ No newline at end of file