Closed
Description
I'm following the example for pm.Data
given here: https://medium.com/@pymc_devs/pymc-3-7-making-data-a-first-class-citizen-7ed87fe4bcc5.
The following adaptation works. Note that I have scaled spin_rate in the DataFrame, rather than scaling it when calculating the latent θ. To compensate I have specified (spin / 1)
instead of (spin / 1000)
.
curveball_data = pd.DataFrame({'spin_rate': [1.9171, 1.8673, 1.9847, 1.8208, 1.9509],
'miss': [0, 0, 1, 0, 1]})
with pm.Model() as curve_spin_model:
spin = pm.Data('spin', curveball_data['spin_rate'])
β = pm.Normal('β', shape=2)
θ = β[0] + β[1] * (spin / 1)
swing_miss = pm.Data('swing_miss', curveball_data['miss'])
miss = pm.Bernoulli('miss', pm.invlogit(θ), observed=swing_miss)
trace = pm.sample()
other_data = pd.DataFrame({'spin_rate': [1.9171, 1.8673, 1.9847, 1.8208, 1.9509, 1.8888],
'miss': [0, 0, 1, 0, 1, 1]})
with curve_spin_model:
pm.set_data({'spin': other_data.spin_rate,
'swing_miss': other_data.miss})
newtrace = pm.sample()
Now surely, dividing spin
by 1 is unnecessary, so we can replace that line with:
θ = β[0] + β[1] * spin
. But, when I do this (keeping every other line exactly the same), I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-6d4ab2cea754> in <module>
19 pm.set_data({'spin': other_data.spin_rate,
20 'swing_miss': other_data.miss})
---> 21 newtrace = pm.sample()
~/pymc37/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
394 start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
395 model=model, random_seed=random_seed,
--> 396 progressbar=progressbar, **kwargs)
397 if start is None:
398 start = start_
~/pymc37/lib/python3.6/site-packages/pymc3/sampling.py in init_nuts(init, chains, n_init, model, random_seed, progressbar, **kwargs)
1513 'Unknown initializer: {}.'.format(init))
1514
-> 1515 step = pm.NUTS(potential=potential, model=model, **kwargs)
1516
1517 return start, step
~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
150 `pm.sample` to the desired number of tuning steps.
151 """
--> 152 super().__init__(vars, **kwargs)
153
154 self.max_treedepth = max_treedepth
~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs)
70 vars = inputvars(vars)
71
---> 72 super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs)
73
74 self.adapt_step_size = adapt_step_size
~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs)
226
227 func = model.logp_dlogp_function(
--> 228 vars, dtype=dtype, **theano_kwargs)
229
230 # handle edge case discovered in #2948
~/pymc37/lib/python3.6/site-packages/pymc3/model.py in logp_dlogp_function(self, grad_vars, **kwargs)
721 varnames = [var.name for var in grad_vars]
722 extra_vars = [var for var in self.free_RVs if var.name not in varnames]
--> 723 return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
724
725 @property
~/pymc37/lib/python3.6/site-packages/pymc3/model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs)
454 self._cost, grad_vars, self._ordering.vmap)
455
--> 456 grad = tt.grad(self._cost_joined, self._vars_joined)
457 grad.name = '__grad'
458
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
603
604 rval = _populate_grad_dict(var_to_app_to_idx,
--> 605 grad_dict, wrt, cost_name)
606
607 for i in xrange(len(rval)):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in _populate_grad_dict(var_to_app_to_idx, grad_dict, wrt, cost_name)
1369 return grad_dict[var]
1370
-> 1371 rval = [access_grad_cache(elem) for elem in wrt]
1372
1373 return rval
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
1369 return grad_dict[var]
1370
-> 1371 rval = [access_grad_cache(elem) for elem in wrt]
1372
1373 return rval
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
1324 for idx in node_to_idx[node]:
1325
-> 1326 term = access_term_cache(node)[idx]
1327
1328 if not isinstance(term, gof.Variable):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
1324 for idx in node_to_idx[node]:
1325
-> 1326 term = access_term_cache(node)[idx]
1327
1328 if not isinstance(term, gof.Variable):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
1324 for idx in node_to_idx[node]:
1325
-> 1326 term = access_term_cache(node)[idx]
1327
1328 if not isinstance(term, gof.Variable):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
1324 for idx in node_to_idx[node]:
1325
-> 1326 term = access_term_cache(node)[idx]
1327
1328 if not isinstance(term, gof.Variable):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
1019 inputs = node.inputs
1020
-> 1021 output_grads = [access_grad_cache(var) for var in node.outputs]
1022
1023 # list of bools indicating if each output is connected to the cost
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
1324 for idx in node_to_idx[node]:
1325
-> 1326 term = access_term_cache(node)[idx]
1327
1328 if not isinstance(term, gof.Variable):
~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
1160
1161 input_grads = node.op.L_op(inputs, node.outputs,
-> 1162 new_output_grads)
1163
1164 if input_grads is None:
~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in L_op(self, inputs, outs, ograds)
541
542 # compute grad with respect to broadcasted input
--> 543 rval = self._bgrad(inputs, outs, ograds)
544
545 # TODO: make sure that zeros are clearly identifiable
~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in _bgrad(self, inputs, outputs, ograds)
641 ret.append(None)
642 continue
--> 643 ret.append(transform(scalar_igrad))
644
645 return ret
~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in transform(r)
633
634 new_r = Elemwise(node.op, {})(
--> 635 *[transform(ipt) for ipt in node.inputs])
636 return new_r
637 ret = []
~/pymc37/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
672 thunk.outputs = [storage_map[v] for v in node.outputs]
673
--> 674 required = thunk()
675 assert not required # We provided all inputs
676
~/pymc37/lib/python3.6/site-packages/theano/gof/op.py in rval()
860
861 def rval():
--> 862 thunk()
863 for o in node.outputs:
864 compute_map[o][0] = True
~/pymc37/lib/python3.6/site-packages/theano/gof/cc.py in __call__(self)
1737 print(self.error_storage, file=sys.stderr)
1738 raise
-> 1739 reraise(exc_type, exc_value, exc_trace)
1740
1741
~/pymc37/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
ValueError: Input dimension mis-match. (input[0].shape[0] = 5, input[1].shape[0] = 6)
Where 6 is the shape of the new data and 5 is the shape of the old data.
This seems like a rather bizarre error. Any ideas what's going on?
Versions and main components
- PyMC3 Version: 3.7
- Theano Version: 1.0.4
- Python Version: 3.6.5
- Operating system: OS Mojave 10.14.4
- How did you install PyMC3: pip
Metadata
Metadata
Assignees
Labels
No labels