Add moments for CAR distribution #5220

zoj613 · 2021-11-22T18:06:27Z

This adds moments for the CAR distribution discussed in #5078 (comment)

zoj613 · 2021-11-22T18:16:13Z

The tests are based on

pymc/pymc/tests/test_distributions_random.py

Lines 2369 to 2381 in b8522dc

    
           def test_car_rng_fn(sparse): 
        
               delta = 0.05  # limit for KS p-value 
        
               n_fails = 20  # Allows the KS fails a certain number of times 
        
               size = (100,) 
        
               W = np.array( 
        
                   [[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]] 
        
               ) 
        
               tau = 2 
        
               alpha = 0.5 
        
               mu = np.array([1, 1, 1, 1])

@ckrapu @aerubanov @ricardoV94 Could you please take a look at this when you can.

codecov · 2021-11-22T18:23:54Z

Codecov Report

Merging #5220 (260586b) into main (b6f76e5) will increase coverage by 0.00%.
The diff coverage is 100.00%.

@@           Coverage Diff           @@
##             main    #5220   +/-   ##
=======================================
  Coverage   79.02%   79.02%           
=======================================
  Files          87       87           
  Lines       14376    14382    +6     
=======================================
+ Hits        11360    11366    +6     
  Misses       3016     3016

Impacted Files	Coverage Δ
pymc/distributions/multivariate.py	`73.98% <100.00%> (+0.22%)`	⬆️

zoj613 · 2021-11-22T22:08:24Z

Not sure why the coverage tanked here. I did not make any changes to parallel_sampling.py.

ricardoV94 · 2021-11-25T12:41:13Z

pymc/tests/test_distributions_moments.py

+    tau = 2
+    alpha = 0.5


Are tau and alpha constrained to be scalars?

just noticed the class docstring allows them to be arrays. I just don't see it being tested anywhere in

pymc/pymc/tests/test_distributions.py

Lines 3162 to 3242 in 99ec0ff

@pytest.mark.parametrize(

"sparse, size",

[(False, ()), (False, (1,)), (False, (4,)), (False, (4, 4, 4)), (True, ()), (True, (4,))],

ids=str,

)

def test_car_logp(sparse, size):

"""

Tests the log probability function for the CAR distribution by checking

against Scipy's multivariate normal logpdf, up to an additive constant.

The formula used by the CAR logp implementation omits several additive terms.

"""

np.random.seed(1)

# d x d adjacency matrix for a square (d=4) of rook-adjacent sites

W = np.array(

[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]

)

tau = 2

alpha = 0.5

mu = np.zeros(4)

xs = np.random.randn(*(size + mu.shape))

# Compute CAR covariance matrix and resulting MVN logp

D = W.sum(axis=0)

prec = tau * (np.diag(D) - alpha * W)

cov = np.linalg.inv(prec)

scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

W = aesara.tensor.as_tensor_variable(W)

if sparse:

W = aesara.sparse.csr_from_dense(W)

car_dist = CAR.dist(mu, W, alpha, tau, size=size)

car_logp = logp(car_dist, xs).eval()

# Check to make sure that the CAR and MVN log PDFs are equivalent

# up to an additive constant which is independent of the CAR parameters

delta_logp = scipy_logp - car_logp

# Check to make sure all the delta values are identical.

tol = 1e-08

if aesara.config.floatX == "float32":

tol = 1e-5

assert np.allclose(delta_logp - delta_logp[0], 0.0, atol=tol)

@pytest.mark.parametrize(

"sparse",

[False, True],

ids=str,

)

def test_car_matrix_check(sparse):

"""

Tests the check of W matrix symmetry in CARRV.make_node.

"""

np.random.seed(1)

tau = 2

alpha = 0.5

mu = np.zeros(4)

xs = np.random.randn(*mu.shape)

# non-symmetric matrix

W = np.array(

[[0.0, 1.0, 2.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]

)

W = aesara.tensor.as_tensor_variable(W)

if sparse:

W = aesara.sparse.csr_from_dense(W)

car_dist = CAR.dist(mu, W, alpha, tau)

with pytest.raises(AssertionError, match="W must be a symmetric adjacency matrix"):

logp(car_dist, xs).eval()

# W.ndim != 2

if not sparse:

W = np.array([0.0, 1.0, 2.0, 0.0])

W = aesara.tensor.as_tensor_variable(W)

with pytest.raises(ValueError, match="W must be a matrix"):

car_dist = CAR.dist(mu, W, alpha, tau)

and

pymc/pymc/tests/test_distributions_random.py

Lines 2368 to 2405 in 99ec0ff

@pytest.mark.parametrize("sparse", [True, False])

def test_car_rng_fn(sparse):

delta = 0.05 # limit for KS p-value

n_fails = 20 # Allows the KS fails a certain number of times

size = (100,)

W = np.array(

[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]

)

tau = 2

alpha = 0.5

mu = np.array([1, 1, 1, 1])

D = W.sum(axis=0)

prec = tau * (np.diag(D) - alpha * W)

cov = np.linalg.inv(prec)

W = aesara.tensor.as_tensor_variable(W)

if sparse:

W = aesara.sparse.csr_from_dense(W)

with pm.Model(rng_seeder=1):

car = pm.CAR("car", mu, W, alpha, tau, size=size)

mn = pm.MvNormal("mn", mu, cov, size=size)

check = pm.sample_prior_predictive(n_fails, return_inferencedata=False)

p, f = delta, n_fails

while p <= delta and f > 0:

car_smp, mn_smp = check["car"][f - 1, :, :], check["mn"][f - 1, :, :]

p = min(

st.ks_2samp(

np.atleast_1d(car_smp[..., idx]).flatten(),

np.atleast_1d(mn_smp[..., idx]).flatten(),

)[1]

for idx in range(car_smp.shape[-1])

)

f -= 1

assert p > delta

which is what this PR's tests are based on. I also don't remember any literature that treats tau is anything more than a scalar.

If the random/logp methods work with non-scalar inputs for those parameters, we need get_moment to do the same. If they are not really supposed to work with those being non-scalar we don't need to, but we might have to open a new issue similar to #5214

#5214 was fixed in #5241 - the changeset was rather simple. @zoj613 do you want to add the enforcement of scalar tau and alpha, or add them as vectors to the test cases?

According to

pymc/pymc/distributions/multivariate.py

Line 1999 in b6f76e5

ndims_params = [1, 2, 0, 0]

it looks like tau and alpha are both scalars. The docstring seems wrong about allowing array input for those parameters.

ndims_params indicates the minimum/base dimensions, not whether they can't be of higher dimensionality. Idelly we wouldn't impose any limitations, but the rng_fn/logp methods might not broadcast/handle higer values than the minimum case properly

@ckrapu is the tau parameter meant to handle array input? I have not seen that case in literature. I also do not see that case being tested in the implementation unittests. I'm asking because it appears you submitted the PR for the distribution. Would you be able to clarify if we need to account for non-scalar tau?

@zoj613 I think it's safe to go ahead and add that explicit limitation for the time being. We can always lift it later (and add tests). Do you want to include that in this PR?

@ricardoV94 I think it's best that array tau be handled as a separate PR. I believe there is no good motivation to account for array tau given its lack of application in literature. IIRC the paper linked in the doc-string only mentions tau as an array in the case an MCAR model, which I believe is different from what is implemented by CAR.

Lack of application in the literature shouldn't be a reason to restrict it, but instead whether we handling it properly (and have tests to give us confidence we are). I agree it can be done in a separate PR, would you be interested in doing that? I'll go ahead and merge this one after one last review

mjhajharia · 2022-01-03T14:17:48Z

@zoj613 any updates on this?!

zoj613 · 2022-01-03T22:34:59Z

@zoj613 any updates on this?!

The tests pass, however there is still ambiguity about allowing non-scalar input for the tau parameter. The docstring suggests array input as acceptable, but that case is not tested anywhere in the codebase.

twiecki · 2022-01-07T09:57:27Z

@zoj613 Can you add a test for that case?

zoj613 force-pushed the carmoments branch from 98f8eb2 to 5524bad Compare November 22, 2021 18:17

twiecki requested a review from ricardoV94 November 23, 2021 08:15

ricardoV94 reviewed Nov 25, 2021

View reviewed changes

ricardoV94 mentioned this pull request Nov 25, 2021

Bring back distribution moments #5078

Closed

51 tasks

michaelosthege added this to the v4.0.0-beta1 (vNext) milestone Nov 26, 2021

ricardoV94 modified the milestones: v4.0.0-beta1 (vNext), v4.0.0-beta2 Dec 6, 2021

zoj613 force-pushed the carmoments branch from 5524bad to c90d671 Compare January 3, 2022 21:18

Add moments for CAR distribution

260586b

zoj613 force-pushed the carmoments branch from c90d671 to 260586b Compare January 3, 2022 21:46

ricardoV94 modified the milestones: v4.0.0b2, v4.0.0b3 Jan 7, 2022

ricardoV94 approved these changes Jan 26, 2022

View reviewed changes

ricardoV94 merged commit e0592ec into pymc-devs:main Jan 26, 2022

	@pytest.mark.parametrize(
	"sparse, size",
	[(False, ()), (False, (1,)), (False, (4,)), (False, (4, 4, 4)), (True, ()), (True, (4,))],
	ids=str,
	)
	def test_car_logp(sparse, size):
	"""
	Tests the log probability function for the CAR distribution by checking
	against Scipy's multivariate normal logpdf, up to an additive constant.
	The formula used by the CAR logp implementation omits several additive terms.
	"""
	np.random.seed(1)

	# d x d adjacency matrix for a square (d=4) of rook-adjacent sites
	W = np.array(
	[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
	)

	tau = 2
	alpha = 0.5
	mu = np.zeros(4)

	xs = np.random.randn(*(size + mu.shape))

	# Compute CAR covariance matrix and resulting MVN logp
	D = W.sum(axis=0)
	prec = tau * (np.diag(D) - alpha * W)
	cov = np.linalg.inv(prec)
	scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)

	W = aesara.tensor.as_tensor_variable(W)
	if sparse:
	W = aesara.sparse.csr_from_dense(W)

	car_dist = CAR.dist(mu, W, alpha, tau, size=size)
	car_logp = logp(car_dist, xs).eval()

	# Check to make sure that the CAR and MVN log PDFs are equivalent
	# up to an additive constant which is independent of the CAR parameters
	delta_logp = scipy_logp - car_logp

	# Check to make sure all the delta values are identical.
	tol = 1e-08
	if aesara.config.floatX == "float32":
	tol = 1e-5
	assert np.allclose(delta_logp - delta_logp[0], 0.0, atol=tol)


	@pytest.mark.parametrize(
	"sparse",
	[False, True],
	ids=str,
	)
	def test_car_matrix_check(sparse):
	"""
	Tests the check of W matrix symmetry in CARRV.make_node.
	"""
	np.random.seed(1)
	tau = 2
	alpha = 0.5
	mu = np.zeros(4)
	xs = np.random.randn(*mu.shape)

	# non-symmetric matrix
	W = np.array(
	[[0.0, 1.0, 2.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
	)
	W = aesara.tensor.as_tensor_variable(W)
	if sparse:
	W = aesara.sparse.csr_from_dense(W)

	car_dist = CAR.dist(mu, W, alpha, tau)
	with pytest.raises(AssertionError, match="W must be a symmetric adjacency matrix"):
	logp(car_dist, xs).eval()

	# W.ndim != 2
	if not sparse:
	W = np.array([0.0, 1.0, 2.0, 0.0])
	W = aesara.tensor.as_tensor_variable(W)
	with pytest.raises(ValueError, match="W must be a matrix"):
	car_dist = CAR.dist(mu, W, alpha, tau)

	@pytest.mark.parametrize("sparse", [True, False])
	def test_car_rng_fn(sparse):
	delta = 0.05 # limit for KS p-value
	n_fails = 20 # Allows the KS fails a certain number of times
	size = (100,)

	W = np.array(
	[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
	)

	tau = 2
	alpha = 0.5
	mu = np.array([1, 1, 1, 1])

	D = W.sum(axis=0)
	prec = tau * (np.diag(D) - alpha * W)
	cov = np.linalg.inv(prec)
	W = aesara.tensor.as_tensor_variable(W)
	if sparse:
	W = aesara.sparse.csr_from_dense(W)

	with pm.Model(rng_seeder=1):
	car = pm.CAR("car", mu, W, alpha, tau, size=size)
	mn = pm.MvNormal("mn", mu, cov, size=size)
	check = pm.sample_prior_predictive(n_fails, return_inferencedata=False)

	p, f = delta, n_fails
	while p <= delta and f > 0:
	car_smp, mn_smp = check["car"][f - 1, :, :], check["mn"][f - 1, :, :]
	p = min(
	st.ks_2samp(
	np.atleast_1d(car_smp[..., idx]).flatten(),
	np.atleast_1d(mn_smp[..., idx]).flatten(),
	)[1]
	for idx in range(car_smp.shape[-1])
	)
	f -= 1
	assert p > delta

Uh oh!

Add moments for CAR distribution #5220

Add moments for CAR distribution #5220

Uh oh!

Conversation

zoj613 commented Nov 22, 2021

Uh oh!

zoj613 commented Nov 22, 2021

Uh oh!

codecov bot commented Nov 22, 2021 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

zoj613 commented Nov 22, 2021

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

zoj613 Jan 26, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

ricardoV94 Jan 26, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mjhajharia commented Jan 3, 2022

Uh oh!

zoj613 commented Jan 3, 2022

Uh oh!

twiecki commented Jan 7, 2022

Uh oh!

Uh oh!

codecov bot commented Nov 22, 2021 •

edited

Loading

zoj613 Jan 26, 2022 •

edited

Loading

ricardoV94 Jan 26, 2022 •

edited

Loading