Skip to content

Commit bbf8e43

Browse files
authored
Merge pull request #961 from Madhav2310/stats4
Covariance and Correlation functions implemented
2 parents 148cae5 + 963360b commit bbf8e43

File tree

2 files changed

+151
-8
lines changed

2 files changed

+151
-8
lines changed

integration_tests/test_statistics.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from statistics import (mean, fmean, geometric_mean, harmonic_mean,
2-
variance, stdev)
2+
variance, stdev, covariance, correlation)
33
from ltypes import i32, f64, i64
44

55
eps: f64
@@ -66,6 +66,46 @@ def test_variance():
6666
k = variance(b)
6767
assert abs(k - 0.40924) < eps
6868

69+
def test_covariance():
70+
a: list[i32]
71+
a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
72+
b: list[i32]
73+
b = [1, 2, 3, 1, 2, 3, 1, 2, 3]
74+
j: f64
75+
j = covariance(a,b)
76+
assert abs(j - 0.75) < eps
77+
78+
c: list[f64]
79+
c = [2.74, 1.23, 2.63, 2.22, 3.0, 1.98]
80+
d: list[f64]
81+
d = [9.4, 1.23, 2.63, 22.4, 1.9, 13.98]
82+
k: f64
83+
k = covariance(c,d)
84+
assert abs(k + 0.24955999999999934) < eps
85+
86+
def test_correlation():
87+
a: list[i32]
88+
a = [11, 2, 7, 4, 15, 6, 10, 8, 9, 1, 11, 5, 13, 6, 15]
89+
b: list[i32]
90+
b = [2, 5, 17, 6, 10, 8, 13, 4, 6, 9, 11, 2, 5, 4, 7]
91+
92+
j: f64
93+
j = correlation(a,b)
94+
assert abs(j - 0.11521487988958108) < eps
95+
96+
c: list[i32]
97+
c = [1, 2, 3, 4, 5, 6, 7, 8, 9]
98+
d: list[i32]
99+
d = [9, 8, 7, 6, 5, 4, 3, 2, 1]
100+
101+
k: f64
102+
k = correlation(c,c)
103+
assert k == 1.0
104+
105+
l: f64
106+
l = correlation(c,d)
107+
assert l == -1.0
108+
69109

70110
def test_stdev():
71111
a: list[i32]
@@ -88,5 +128,7 @@ def check():
88128
test_fmean()
89129
test_variance()
90130
test_stdev()
131+
test_covariance()
132+
test_correlation()
91133

92134
check()

src/runtime/statistics.py

Lines changed: 108 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from ltypes import i32, f64, overload
2-
2+
from math import sqrt
33

44

55
@overload
@@ -117,7 +117,7 @@ def geometric_mean(x: list[i32]) -> f64:
117117
for i in range(k):
118118
product *= float(x[i])
119119

120-
return product**(1/k)
120+
return product ** (1 / k)
121121

122122

123123
def harmonic_mean(x: list[i32]) -> f64:
@@ -136,7 +136,7 @@ def harmonic_mean(x: list[i32]) -> f64:
136136
return 0.0
137137
sum += 1 / x[i]
138138

139-
return k/sum
139+
return k / sum
140140

141141
@overload
142142
def variance(x: list[f64]) -> f64:
@@ -153,8 +153,8 @@ def variance(x: list[f64]) -> f64:
153153
num = 0.0
154154
i: i32
155155
for i in range(n):
156-
num += (x[i]-xmean)**2
157-
return num/(n-1)
156+
num += (x[i] - xmean)**2
157+
return num / (n-1)
158158

159159
@overload
160160
def variance(x: list[i32]) -> f64:
@@ -171,8 +171,8 @@ def variance(x: list[i32]) -> f64:
171171
num = 0.0
172172
i: i32
173173
for i in range(n):
174-
num += (x[i]-xmean)**2
175-
return num/(n-1)
174+
num += (x[i] - xmean)**2
175+
return num / (n-1)
176176

177177

178178
@overload
@@ -188,3 +188,104 @@ def stdev(x: list[i32]) -> f64:
188188
Returns the standard deviation of a data sequence of numbers
189189
"""
190190
return variance(x)**0.5
191+
192+
@overload
193+
def covariance(x: list[i32], y: list[i32]) -> f64:
194+
"""
195+
Returns the covariance of a data sequence of numbers
196+
"""
197+
n: i32 = len(x)
198+
m: i32 = len(y)
199+
if (n < 2 or m < 2) or n != m:
200+
raise Exception("Both inputs must be of the same length (no less than two)")
201+
xmean: f64 = mean(x)
202+
ymean: f64 = mean(y)
203+
num: f64
204+
num = 0.0
205+
i: i32
206+
for i in range(n):
207+
num += (x[i] - xmean) * (y[i] - ymean)
208+
return num / (n-1)
209+
210+
@overload
211+
def covariance(x: list[f64], y: list[f64]) -> f64:
212+
"""
213+
Returns the covariance of a data sequence of numbers
214+
"""
215+
n: i32 = len(x)
216+
m: i32 = len(y)
217+
if (n < 2 or m < 2) or n != m:
218+
raise Exception("Both inputs must be of the same length (no less than two)")
219+
xmean: f64 = mean(x)
220+
ymean: f64 = mean(y)
221+
num: f64
222+
num = 0.0
223+
i: i32
224+
for i in range(n):
225+
num += (x[i] - xmean) * (y[i] - ymean)
226+
return num / (n-1)
227+
228+
@overload
229+
def correlation(x: list[i32], y: list[i32]) -> f64:
230+
"""
231+
Return the Pearson's correlation coefficient for two inputs.
232+
"""
233+
n: i32 = len(x)
234+
m: i32 = len(y)
235+
if n != m:
236+
raise Exception("correlation requires that both inputs have same number of data points")
237+
if n < 2:
238+
raise Exception("correlation requires at least two data points")
239+
xmean: f64 = mean(x)
240+
ymean: f64 = mean(y)
241+
242+
sxy: f64 = 0.0
243+
i: i32
244+
for i in range(n):
245+
sxy += (x[i] - xmean) * (y[i] - ymean)
246+
247+
sxx: f64 = 0.0
248+
j: i32
249+
for j in range(n):
250+
sxx += (x[j] - xmean) ** 2
251+
252+
syy: f64 = 0.0
253+
k: i32
254+
for k in range(n):
255+
syy += (y[k] - ymean) ** 2
256+
if sqrt(sxx * syy) == 0:
257+
raise Exception('at least one of the inputs is constant')
258+
return sxy / sqrt(sxx * syy)
259+
260+
@overload
261+
def correlation(x: list[f64], y: list[f64]) -> f64:
262+
"""
263+
Return the Pearson's correlation coefficient for two inputs.
264+
"""
265+
n: i32 = len(x)
266+
m: i32 = len(y)
267+
if n != m:
268+
raise Exception("correlation requires that both inputs have same number of data points")
269+
if n < 2:
270+
raise Exception("correlation requires at least two data points")
271+
xmean: f64 = mean(x)
272+
ymean: f64 = mean(y)
273+
274+
sxy: f64 = 0.0
275+
i: i32
276+
for i in range(n):
277+
sxy += (x[i] - xmean) * (y[i] - ymean)
278+
279+
sxx: f64 = 0.0
280+
j: i32
281+
for j in range(n):
282+
sxx += (x[j] - xmean) ** 2
283+
284+
syy: f64 = 0.0
285+
k: i32
286+
for k in range(n):
287+
syy += (y[k] - ymean) ** 2
288+
if sqrt(sxx * syy) == 0:
289+
raise Exception('at least one of the inputs is constant')
290+
return sxy / sqrt(sxx * syy)
291+

0 commit comments

Comments
 (0)