From e60bdead76c91360019b66ad3594ea9c1b63d3b9 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Sun, 17 May 2020 15:23:37 +0200 Subject: [PATCH 1/7] From corr_dev Squashed commit of the following: commit 9cd75389ecb7309997ca63b7c10e342a2fa34677 Author: Vandenplas, Jeremie Date: Sat May 16 22:48:56 2020 +0200 corr_dev: addition of specs commit 4af1d2de5e1c3afa5f16e3c4ae2dd55d5b8a289c Author: Vandenplas, Jeremie Date: Sat May 16 22:29:42 2020 +0200 corr_dev: addition of tests for csp and int32 commit aef4416e251683d6b72b2e843cf29dacd306f47c Author: Vandenplas, Jeremie Date: Sat May 16 22:29:08 2020 +0200 corr_dev: addition of tests for csp and int32 commit 0e6ec5db5ca1df642d821642e0f8d487e8dfa508 Author: Vandenplas, Jeremie Date: Sat May 16 22:19:56 2020 +0200 corr_dev: clarification commit 1fdabd93928dad69e3633d386a4299350e81b00b Author: Vandenplas, Jeremie Date: Sat May 16 22:14:44 2020 +0200 corr_dev: correction of an issue with complex commit 1632355b8f7b97569ae5cf826cf7fcc478668528 Author: Vandenplas, Jeremie Date: Sat May 16 21:41:12 2020 +0200 corr_dev:completed impl commit 1fe73b1b8eebf8c6c3037a48a5097fc083e5920d Merge: 9da1d97 ce987d2 Author: Vandenplas, Jeremie Date: Sat May 16 13:13:46 2020 +0200 Merge remote-tracking branch 'upstream/master' into corr_dev commit 9da1d97d80c437d675aa16f1cfd2f8858c1cf2f6 Author: Vandenplas, Jeremie Date: Sat May 16 12:46:05 2020 +0200 corr_dev: addition of tests for int64 commit d03d828c251ea8c995e603c521c8ad8d15acdd93 Author: Vandenplas, Jeremie Date: Sat May 16 12:22:13 2020 +0200 corr_dev: addition of test for dp complex numbers commit 7901ee2e7f10eca64afd158e45a86860ee0a1fde Author: Vandenplas, Jeremie Date: Fri May 15 20:18:03 2020 +0200 corr_dev: correction for vector with all elements false commit 2a119c2405a6dec4011d7a6498e39b844687f810 Author: Vandenplas, Jeremie Date: Fri May 15 19:02:12 2020 +0200 corr_dev: done until mask commit 19c1f8e9d83f0d781b8b44518b170f46e0fb7cae Author: Vandenplas, Jeremie Date: Fri May 15 16:42:47 2020 +0200 corr_dev: implemented some correlation functions commit ec16e9c4383601af36cc73e64252dbf6467a04a5 Author: Vandenplas, Jeremie Date: Fri May 15 16:12:13 2020 +0200 corr_dev: init --- doc/specs/stdlib_experimental_stats.md | 48 ++++ src/CMakeLists.txt | 1 + src/stdlib_experimental_stats.fypp | 96 ++++++- src/stdlib_experimental_stats_corr.fypp | 311 ++++++++++++++++++++++ src/tests/stats/CMakeLists.txt | 1 + src/tests/stats/test_corr.f90 | 338 ++++++++++++++++++++++++ 6 files changed, 794 insertions(+), 1 deletion(-) create mode 100644 src/stdlib_experimental_stats_corr.fypp create mode 100644 src/tests/stats/test_corr.f90 diff --git a/doc/specs/stdlib_experimental_stats.md b/doc/specs/stdlib_experimental_stats.md index 62d5cf551..028f1db86 100644 --- a/doc/specs/stdlib_experimental_stats.md +++ b/doc/specs/stdlib_experimental_stats.md @@ -6,6 +6,54 @@ title: experimental_stats [TOC] +## `corr` - Pearson correlation of array elements + +### Description + +Returns the Pearson correlation of the elements of `array` along dimension `dim` if the corresponding element in `mask` is `true`. + +The Pearson correlation between two rows (or columns), say `x` and `y`, of `array` is defined as: + +``` + corr(x, y) = cov(x, y) / sqrt( var(x) * var(y)) +``` + +### Syntax + +`result = corr(array, dim [, mask])` + +### Arguments + +`array`: Shall be a rank-1 or a rank-2 array of type `integer`, `real`, or `complex`. + +`dim`: Shall be a scalar of type `integer` with a value in the range from 1 to `n`, where `n` is the rank of `array`. + +`mask` (optional): Shall be of type `logical` and either a scalar or an array of the same shape as `array`. + +### Return value + +If `array` is of rank 1 and of type `real` or `complex`, the result is of type `real` corresponding to the type of `array`. +If `array` is of rank 2 and of type `real` or `complex`, the result is of the same type as `array`. +If `array` is of type `integer`, the result is of type `real(dp)`. + +If `array` is of rank 1 and of size larger than 1, a scalar equal to 1 is returned. Otherwise, IEEE `NaN` is returned. +If `array` is of rank 2, a rank-2 array with the corresponding correlations is returned. + +If `mask` is specified, the result is the Pearson correlation of all elements of `array` corresponding to `true` elements of `mask`. If every element of `mask` is `false`, the result is IEEE `NaN`. + +### Example + +```fortran +program demo_corr + use stdlib_experimental_stats, only: corr + implicit none + real :: x(1:6) = [ 1., 2., 3., 4., 5., 6. ] + real :: y(1:2, 1:3) = reshape([ -1., 40., -3., 4., 10., 6. ], [ 2, 3]) + print *, corr(x, 1) !returns 1. + print *, corr(y, 2) !returns reshape([ 1., -.32480, -.32480, 1. ], [ 2, 3]) +end program demo_corr +``` + ## `cov` - covariance of array elements ### Description diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 51fd01d62..1d67eba77 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,7 @@ set(fppFiles stdlib_experimental_linalg_diag.fypp stdlib_experimental_optval.fypp stdlib_experimental_stats.fypp + stdlib_experimental_stats_corr.fypp stdlib_experimental_stats_cov.fypp stdlib_experimental_stats_mean.fypp stdlib_experimental_stats_moment.fypp diff --git a/src/stdlib_experimental_stats.fypp b/src/stdlib_experimental_stats.fypp index e5cb89a2a..e3e6bd747 100644 --- a/src/stdlib_experimental_stats.fypp +++ b/src/stdlib_experimental_stats.fypp @@ -8,7 +8,101 @@ module stdlib_experimental_stats implicit none private ! Public API - public :: cov, mean, moment, var + public :: corr, cov, mean, moment, var + + interface corr + #:for k1, t1 in RC_KINDS_TYPES + #:set RName = rname("corr",1, t1, k1) + module function ${RName}$(x, dim, mask) result(res) + ${t1}$, intent(in) :: x(:) + integer, intent(in) :: dim + logical, intent(in), optional :: mask + real(${k1}$) :: res + end function ${RName}$ + #:endfor + + + #:for k1, t1 in INT_KINDS_TYPES + #:set RName = rname("corr",1, t1, k1, 'dp') + module function ${RName}$(x, dim, mask) result(res) + ${t1}$, intent(in) :: x(:) + integer, intent(in) :: dim + logical, intent(in), optional :: mask + real(dp) :: res + end function ${RName}$ + #:endfor + + + #:for k1, t1 in RC_KINDS_TYPES + #:set RName = rname("corr_mask",1, t1, k1) + module function ${RName}$(x, dim, mask) result(res) + ${t1}$, intent(in) :: x(:) + integer, intent(in) :: dim + logical, intent(in) :: mask(:) + real(${k1}$) :: res + end function ${RName}$ + #:endfor + + + #:for k1, t1 in INT_KINDS_TYPES + #:set RName = rname("corr_mask",1, t1, k1, 'dp') + module function ${RName}$(x, dim, mask) result(res) + ${t1}$, intent(in) :: x(:) + integer, intent(in) :: dim + logical, intent(in) :: mask(:) + real(dp) :: res + end function ${RName}$ + #:endfor + + + #:for k1, t1 in RC_KINDS_TYPES + #:set RName = rname("corr",2, t1, k1) + module function ${RName}$(x, dim, mask) result(res) + ${t1}$, intent(in) :: x(:, :) + integer, intent(in) :: dim + logical, intent(in), optional :: mask + ${t1}$ :: res(merge(size(x, 1), size(x, 2), mask = 1 Date: Sun, 17 May 2020 15:34:14 +0200 Subject: [PATCH 2/7] corr: formatting --- src/stdlib_experimental_stats.fypp | 173 ++++++++++++++--------------- 1 file changed, 84 insertions(+), 89 deletions(-) diff --git a/src/stdlib_experimental_stats.fypp b/src/stdlib_experimental_stats.fypp index e3e6bd747..c2ebdeb4e 100644 --- a/src/stdlib_experimental_stats.fypp +++ b/src/stdlib_experimental_stats.fypp @@ -10,96 +10,91 @@ module stdlib_experimental_stats ! Public API public :: corr, cov, mean, moment, var + interface corr - #:for k1, t1 in RC_KINDS_TYPES - #:set RName = rname("corr",1, t1, k1) - module function ${RName}$(x, dim, mask) result(res) - ${t1}$, intent(in) :: x(:) - integer, intent(in) :: dim - logical, intent(in), optional :: mask - real(${k1}$) :: res - end function ${RName}$ - #:endfor - - - #:for k1, t1 in INT_KINDS_TYPES - #:set RName = rname("corr",1, t1, k1, 'dp') - module function ${RName}$(x, dim, mask) result(res) - ${t1}$, intent(in) :: x(:) - integer, intent(in) :: dim - logical, intent(in), optional :: mask - real(dp) :: res - end function ${RName}$ - #:endfor - - - #:for k1, t1 in RC_KINDS_TYPES - #:set RName = rname("corr_mask",1, t1, k1) - module function ${RName}$(x, dim, mask) result(res) - ${t1}$, intent(in) :: x(:) - integer, intent(in) :: dim - logical, intent(in) :: mask(:) - real(${k1}$) :: res - end function ${RName}$ - #:endfor - - - #:for k1, t1 in INT_KINDS_TYPES - #:set RName = rname("corr_mask",1, t1, k1, 'dp') - module function ${RName}$(x, dim, mask) result(res) - ${t1}$, intent(in) :: x(:) - integer, intent(in) :: dim - logical, intent(in) :: mask(:) - real(dp) :: res - end function ${RName}$ - #:endfor - - - #:for k1, t1 in RC_KINDS_TYPES - #:set RName = rname("corr",2, t1, k1) - module function ${RName}$(x, dim, mask) result(res) - ${t1}$, intent(in) :: x(:, :) - integer, intent(in) :: dim - logical, intent(in), optional :: mask - ${t1}$ :: res(merge(size(x, 1), size(x, 2), mask = 1 Date: Sun, 17 May 2020 16:54:29 +0200 Subject: [PATCH 3/7] corr: correction for dot_product complex --- src/stdlib_experimental_stats_corr.fypp | 4 +- src/tests/stats/test_corr.f90 | 57 +++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/stdlib_experimental_stats_corr.fypp b/src/stdlib_experimental_stats_corr.fypp index 413ef454c..7924cf194 100644 --- a/src/stdlib_experimental_stats_corr.fypp +++ b/src/stdlib_experimental_stats_corr.fypp @@ -243,7 +243,7 @@ contains #:endif mask_) - res(j, i) = dot_product( centerj_, centeri_)& + res(j, i) = dot_product( centeri_, centerj_)& /sqrt(dot_product( centeri_, centeri_)*& dot_product( centerj_, centerj_)) end do @@ -295,7 +295,7 @@ contains centerj_ = merge( x(j, :) - mean(x(j, :), mask = mask_),& 0._dp, mask_) - res(j, i) = dot_product( centerj_, centeri_)& + res(j, i) = dot_product( centeri_, centerj_)& /sqrt(dot_product( centeri_, centeri_)*& dot_product( centerj_, centerj_)) end do diff --git a/src/tests/stats/test_corr.f90 b/src/tests/stats/test_corr.f90 index b411fa365..e6e77b59b 100644 --- a/src/tests/stats/test_corr.f90 +++ b/src/tests/stats/test_corr.f90 @@ -92,6 +92,15 @@ subroutine test_sp(x, x2) ) < sptol)& , 'sp check 12') + + call check( all(abs(corr(x2, 1, mask = x2 < 1000) - corr(x2, 1))& + < sptol)& + , 'sp check 13') + + call check( all(abs(corr(x2, 2, mask = x2 < 1000) - corr(x2, 2))& + < sptol)& + , 'sp check 14') + end subroutine test_sp subroutine test_dp(x, x2) @@ -145,6 +154,14 @@ subroutine test_dp(x, x2) ) < dptol)& , 'dp check 12') + call check( all(abs(corr(x2, 1, mask = x2 < 1000) - corr(x2, 1))& + < dptol)& + , 'dp check 13') + + call check( all(abs(corr(x2, 2, mask = x2 < 1000) - corr(x2, 2))& + < dptol)& + , 'dp check 14') + end subroutine test_dp subroutine test_int32(x, x2) @@ -198,6 +215,14 @@ subroutine test_int32(x, x2) ) < dptol)& , 'int32 check 12') + call check( all(abs(corr(x2, 1, mask = x2 < 1000) - corr(x2, 1))& + < dptol)& + , 'int32 check 13') + + call check( all(abs(corr(x2, 2, mask = x2 < 1000) - corr(x2, 2))& + < dptol)& + , 'int32 check 14') + end subroutine test_int32 subroutine test_int64(x, x2) @@ -251,6 +276,14 @@ subroutine test_int64(x, x2) ) < dptol)& , 'int64 check 12') + call check( all(abs(corr(x2, 1, mask = x2 < 1000) - corr(x2, 1))& + < dptol)& + , 'int64 check 13') + + call check( all(abs(corr(x2, 2, mask = x2 < 1000) - corr(x2, 2))& + < dptol)& + , 'int64 check 14') + end subroutine test_int64 subroutine test_csp(x, x2) @@ -286,12 +319,20 @@ subroutine test_csp(x, x2) , 'csp check 7') call check( all( abs( corr(x2, 2, mask = aimag(x2) < 6) - reshape([& - (1._sp,0._sp), (0._sp,-1._sp)& - ,(0._sp,1._sp), (1._sp,0._sp)]& + (1._sp,0._sp), (0._sp,1._sp)& + ,(0._sp,-1._sp), (1._sp,0._sp)]& ,[ size(x2, 1), size(x2, 1)])& ) < sptol)& , 'csp check 8') + call check( all(abs(corr(x2, 1, mask = aimag(x2) < 1000) - corr(x2, 1))& + < sptol)& + , 'csp check 9') + + call check( all(abs(corr(x2, 2, mask = aimag(x2) < 1000) - corr(x2, 2))& + < sptol)& + , 'csp check 10') + end subroutine test_csp subroutine test_cdp(x, x2) @@ -327,12 +368,20 @@ subroutine test_cdp(x, x2) , 'cdp check 7') call check( all( abs( corr(x2, 2, mask = aimag(x2) < 6) - reshape([& - (1._dp,0._dp), (0._dp,-1._dp)& - ,(0._dp,1._dp), (1._dp,0._dp)]& + (1._dp,0._dp), (0._dp,1._dp)& + ,(0._dp,-1._dp), (1._dp,0._dp)]& ,[ size(x2, 1), size(x2, 1)])& ) < dptol)& , 'cdp check 8') + call check( all(abs(corr(x2, 1, mask = aimag(x2) < 1000) - corr(x2, 1))& + < sptol)& + , 'csp check 9') + + call check( all(abs(corr(x2, 2, mask = aimag(x2) < 1000) - corr(x2, 2))& + < sptol)& + , 'csp check 10') + end subroutine test_cdp end program test_corr From 918911bd1edcee02656f83c7122dfb8bd0c4a309 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Thu, 28 May 2020 20:38:00 +0200 Subject: [PATCH 4/7] corr:rebase and addition of links --- doc/specs/stdlib_experimental_stats.md | 2 +- src/stdlib_experimental_stats.fypp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/specs/stdlib_experimental_stats.md b/doc/specs/stdlib_experimental_stats.md index bff1aa533..e22921d39 100644 --- a/doc/specs/stdlib_experimental_stats.md +++ b/doc/specs/stdlib_experimental_stats.md @@ -20,7 +20,7 @@ The Pearson correlation between two rows (or columns), say `x` and `y`, of `arra ### Syntax -`result = corr(array, dim [, mask])` +`result = [[stdlib_experimental_stats(module):corr(interface)]](array, dim [, mask])` ### Arguments diff --git a/src/stdlib_experimental_stats.fypp b/src/stdlib_experimental_stats.fypp index a6400a1f8..f3632a8f0 100644 --- a/src/stdlib_experimental_stats.fypp +++ b/src/stdlib_experimental_stats.fypp @@ -15,6 +15,8 @@ module stdlib_experimental_stats interface corr + !! Pearson correlation of array elements + !! ([Specification](../page/specs/stdlib_experimental_stats.html#description)) #:for k1, t1 in RC_KINDS_TYPES #:set RName = rname("corr",1, t1, k1) module function ${RName}$(x, dim, mask) result(res) @@ -104,7 +106,7 @@ module stdlib_experimental_stats interface cov !! Covariance of array elements - !! ([Specification](../page/specs/stdlib_experimental_stats.html#description)) + !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_1)) #:for k1, t1 in RC_KINDS_TYPES #:set RName = rname("cov",1, t1, k1) module function ${RName}$(x, dim, mask, corrected) result(res) @@ -201,7 +203,7 @@ module stdlib_experimental_stats interface mean !! Mean of array elements - !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_1)) + !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_2)) #:for k1, t1 in RC_KINDS_TYPES #:for rank in RANKS #:set RName = rname("mean_all",rank, t1, k1) @@ -299,7 +301,7 @@ module stdlib_experimental_stats interface var !! Variance of array elements - !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_3)) + !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_4)) #:for k1, t1 in RC_KINDS_TYPES #:for rank in RANKS @@ -406,7 +408,7 @@ module stdlib_experimental_stats interface moment !! Central moment of array elements - !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_2)) + !! ([Specification](../page/specs/stdlib_experimental_stats.html#description_3)) #:for k1, t1 in RC_KINDS_TYPES #:for rank in RANKS #:set RName = rname("moment_all",rank, t1, k1) From 42084338bc43d99c6f6bb4e6df02d6c02790c430 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Fri, 5 Jun 2020 07:29:25 +0200 Subject: [PATCH 5/7] corr: add kind to cmplx --- src/tests/stats/test_corr.f90 | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/tests/stats/test_corr.f90 b/src/tests/stats/test_corr.f90 index e6e77b59b..a7ff0bd31 100644 --- a/src/tests/stats/test_corr.f90 +++ b/src/tests/stats/test_corr.f90 @@ -14,17 +14,17 @@ program test_corr 3._dp, 4._dp, 6._dp, 20._dp,& 15._dp, 14._dp, 13._dp, 12._dp], [4, 3]) - complex(dp) :: cd1(5) = [ cmplx(0.57706_dp, 0.00000_dp),& - cmplx(0.00000_dp, 1.44065_dp),& - cmplx(1.26401_dp, 0.00000_dp),& - cmplx(0.00000_dp, 0.88833_dp),& - cmplx(1.14352_dp, 0.00000_dp)] - complex(dp) :: ds(2,3) = reshape([ cmplx(1._dp, 0._dp),& - cmplx(0._dp, 2._dp),& - cmplx(3._dp, 0._dp),& - cmplx(0._dp, 4._dp),& - cmplx(5._dp, 0._dp),& - cmplx(0._dp, 6._dp)], [2, 3]) + complex(dp) :: cd1(5) = [ cmplx(0.57706_dp, 0.00000_dp, kind = dp),& + cmplx(0.00000_dp, 1.44065_dp, kind = dp),& + cmplx(1.26401_dp, 0.00000_dp, kind = dp),& + cmplx(0.00000_dp, 0.88833_dp, kind = dp),& + cmplx(1.14352_dp, 0.00000_dp, kind = dp)] + complex(dp) :: ds(2,3) = reshape([ cmplx(1._dp, 0._dp, kind = dp),& + cmplx(0._dp, 2._dp, kind = dp),& + cmplx(3._dp, 0._dp, kind = dp),& + cmplx(0._dp, 4._dp, kind = dp),& + cmplx(5._dp, 0._dp, kind = dp),& + cmplx(0._dp, 6._dp, kind = dp)], [2, 3]) call test_sp(real(d1, sp), real(d, sp)) From d493c7692e67b69e48edf79c61571f9f2edd0050 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 5 Jun 2020 18:24:07 +0200 Subject: [PATCH 6/7] Update doc/specs/stdlib_experimental_stats.md Co-authored-by: Milan Curcic --- doc/specs/stdlib_experimental_stats.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/specs/stdlib_experimental_stats.md b/doc/specs/stdlib_experimental_stats.md index e22921d39..69e70d909 100644 --- a/doc/specs/stdlib_experimental_stats.md +++ b/doc/specs/stdlib_experimental_stats.md @@ -32,7 +32,7 @@ The Pearson correlation between two rows (or columns), say `x` and `y`, of `arra ### Return value -If `array` is of rank 1 and of type `real` or `complex`, the result is of type `real` corresponding to the type of `array`. +If `array` is of rank 1 and of type `real` or `complex`, the result is of type `real` and has the same kind as `array`. If `array` is of rank 2 and of type `real` or `complex`, the result is of the same type as `array`. If `array` is of type `integer`, the result is of type `real(dp)`. @@ -281,4 +281,3 @@ program demo_var print *, var(y, 1, y > 3., corrected=.false.) !returns [NaN, 0., 0.25] end program demo_var ``` - From c833cbc4d65623dc0fb6a5ac98709400ded40bc2 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 5 Jun 2020 18:25:06 +0200 Subject: [PATCH 7/7] Update doc/specs/stdlib_experimental_stats.md Co-authored-by: Milan Curcic --- doc/specs/stdlib_experimental_stats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_experimental_stats.md b/doc/specs/stdlib_experimental_stats.md index 69e70d909..f7bf46356 100644 --- a/doc/specs/stdlib_experimental_stats.md +++ b/doc/specs/stdlib_experimental_stats.md @@ -33,7 +33,7 @@ The Pearson correlation between two rows (or columns), say `x` and `y`, of `arra ### Return value If `array` is of rank 1 and of type `real` or `complex`, the result is of type `real` and has the same kind as `array`. -If `array` is of rank 2 and of type `real` or `complex`, the result is of the same type as `array`. +If `array` is of rank 2 and of type `real` or `complex`, the result is of the same type and kind as `array`. If `array` is of type `integer`, the result is of type `real(dp)`. If `array` is of rank 1 and of size larger than 1, a scalar equal to 1 is returned. Otherwise, IEEE `NaN` is returned.