Skip to content

inference fails with ValueError indicating wrong dimensions after set()ing the data #4029

@Robert-Muil

Description

@Robert-Muil

Description of your problem

Attempting to re-run inference on a model after using pm.set() to change the data causes a ValueError.
Running sample_posterior_predictive and other such methods work fine.

Verified with @twiecki that this is likely to be a bug, not just user error. Happy to help diagnose if given instructions.
The error here is:

ValueError: Elemwise{sub,no_inplace}.grad returned object of shape (70,) as gradient term on input 0 of shape (100,)

However, have also seen the following if 2 Data instances are used in the model (with the same length), and both changed:

ValueError: Input dimension mis-match. (input[0].shape[0] = 70, input[1].shape[0] = 100)

Please provide a minimal, self-contained, and reproducible example.

import pymc3 as pm
import numpy as np

# create actual system, very simple: just a constant (y) with noise.
y = 15
y_obs = y + (1.23 * np.random.randn(100))

# First, fit with partial data 
with pm.Model() as m:
    y_obs_data = pm.Data('y_obs', y_obs[0:70])
    assert len(m['y_obs'].get_value()) == 70

    y_est = pm.Normal('y_est', sigma=45.67)

    pm.Normal('obs', mu=y_est, sigma=1.23, observed=y_obs_data)
    
    prior_pred = pm.sample_prior_predictive()
    
    trace = pm.sample()
    
# Now attempt to extend data out to the full length, and run inference again:
# this fails with a ValueError
with m:
    pm.set_data({'y_obs': y_obs})
    assert len(m['y_obs'].get_value()) == 100
    
    trace = pm.sample()

Please provide the full traceback.

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [y_est]

 100.00% [8000/8000 00:01<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-f318a9058143> in <module>
     27 
     28     # Inference
---> 29     trace = pm.sample()

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, return_inferencedata, idata_kwargs, **kwargs)
    471                 random_seed=random_seed,
    472                 progressbar=progressbar,
--> 473                 **kwargs
    474             )
    475             if start is None:

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/sampling.py in init_nuts(init, chains, n_init, model, random_seed, progressbar, **kwargs)
   2051         raise ValueError("Unknown initializer: {}.".format(init))
   2052 
-> 2053     step = pm.NUTS(potential=potential, model=model, **kwargs)
   2054 
   2055     return start, step

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    156         `pm.sample` to the desired number of tuning steps.
    157         """
--> 158         super().__init__(vars, **kwargs)
    159 
    160         self.max_treedepth = max_treedepth

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs)
     84         vars = inputvars(vars)
     85 
---> 86         super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs)
     87 
     88         self.adapt_step_size = adapt_step_size

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs)
    242 
    243         func = model.logp_dlogp_function(
--> 244             vars, dtype=dtype, **theano_kwargs)
    245 
    246         # handle edge case discovered in #2948

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/model.py in logp_dlogp_function(self, grad_vars, **kwargs)
    933         varnames = [var.name for var in grad_vars]
    934         extra_vars = [var for var in self.free_RVs if var.name not in varnames]
--> 935         return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
    936 
    937     @property

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/pymc3/model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs)
    647         )
    648 
--> 649         grad = tt.grad(self._cost_joined, self._vars_joined)
    650         grad.name = "__grad"
    651 

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
    603 
    604     rval = _populate_grad_dict(var_to_app_to_idx,
--> 605                                grad_dict, wrt, cost_name)
    606 
    607     for i in xrange(len(rval)):

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in _populate_grad_dict(var_to_app_to_idx, grad_dict, wrt, cost_name)
   1369         return grad_dict[var]
   1370 
-> 1371     rval = [access_grad_cache(elem) for elem in wrt]
   1372 
   1373     return rval

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in <listcomp>(.0)
   1369         return grad_dict[var]
   1370 
-> 1371     rval = [access_grad_cache(elem) for elem in wrt]
   1372 
   1373     return rval

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

/usr/local/anaconda3/envs/bayes_course_2020/lib/python3.7/site-packages/theano/gradient.py in access_term_cache(node)
   1235                                 "%s.grad returned object of "
   1236                                 "shape %s as gradient term on input %d "
-> 1237                                 "of shape %s" % (node.op, t_shape, i, i_shape))
   1238 
   1239                 if not isinstance(term.type,

ValueError: Elemwise{sub,no_inplace}.grad returned object of shape (70,) as gradient term on input 0 of shape (100,)

Please provide any additional information below.

Versions and main components

  • PyMC3 Version: 3.9.2
  • Theano Version: 1.0.4 (git revision e0167f24ae896a2e956cdd99a629910cd717a299)
  • Python Version: 3.7.6 | packaged by conda-forge | (default, Jun 1 2020, 18:33:30) \n[Clang 9.0.1 ]
  • Operating system: MacOSX Catalina 10.15.6 (19G73) (uname -a gives: Darwin 19.6.0 Darwin Kernel Version 19.6.0: Sun Jul 5 00:43:10 PDT 2020; root:xnu-6153.141.1~9/RELEASE_X86_64 x86_64)
  • How did you install PyMC3: conda

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions