Shape mismatch when using pm.Data

I'm following the example for `pm.Data` given here: https://medium.com/@pymc_devs/pymc-3-7-making-data-a-first-class-citizen-7ed87fe4bcc5.

The following adaptation works. Note that I have scaled spin_rate in the DataFrame, rather than scaling it when calculating the latent θ. To compensate I have specified `(spin / 1)` instead of `(spin / 1000)`.

```
curveball_data = pd.DataFrame({'spin_rate': [1.9171, 1.8673, 1.9847, 1.8208, 1.9509],
                              'miss': [0, 0, 1, 0, 1]})

with pm.Model() as curve_spin_model:

    spin = pm.Data('spin', curveball_data['spin_rate'])
    β = pm.Normal('β', shape=2)
    
    θ = β[0] + β[1] * (spin / 1)
    
    swing_miss = pm.Data('swing_miss', curveball_data['miss'])
    miss = pm.Bernoulli('miss', pm.invlogit(θ), observed=swing_miss)

    trace = pm.sample()

other_data = pd.DataFrame({'spin_rate': [1.9171, 1.8673, 1.9847, 1.8208, 1.9509, 1.8888],
                              'miss':   [0, 0, 1, 0, 1, 1]})
with curve_spin_model:
    pm.set_data({'spin': other_data.spin_rate, 
                'swing_miss': other_data.miss})
    newtrace = pm.sample()
```

Now surely, dividing `spin` by 1 is unnecessary, so we can replace that line with:
` θ = β[0] + β[1] * spin`. But, when I do this (keeping every other line exactly the same), I get the following error:
```
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-6d4ab2cea754> in <module>
     19     pm.set_data({'spin': other_data.spin_rate, 
     20                 'swing_miss': other_data.miss})
---> 21     newtrace = pm.sample()

~/pymc37/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
    394                 start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
    395                                          model=model, random_seed=random_seed,
--> 396                                          progressbar=progressbar, **kwargs)
    397                 if start is None:
    398                     start = start_

~/pymc37/lib/python3.6/site-packages/pymc3/sampling.py in init_nuts(init, chains, n_init, model, random_seed, progressbar, **kwargs)
   1513             'Unknown initializer: {}.'.format(init))
   1514 
-> 1515     step = pm.NUTS(potential=potential, model=model, **kwargs)
   1516 
   1517     return start, step

~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    150         `pm.sample` to the desired number of tuning steps.
    151         """
--> 152         super().__init__(vars, **kwargs)
    153 
    154         self.max_treedepth = max_treedepth

~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs)
     70         vars = inputvars(vars)
     71 
---> 72         super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs)
     73 
     74         self.adapt_step_size = adapt_step_size

~/pymc37/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs)
    226 
    227         func = model.logp_dlogp_function(
--> 228             vars, dtype=dtype, **theano_kwargs)
    229 
    230         # handle edge case discovered in #2948

~/pymc37/lib/python3.6/site-packages/pymc3/model.py in logp_dlogp_function(self, grad_vars, **kwargs)
    721         varnames = [var.name for var in grad_vars]
    722         extra_vars = [var for var in self.free_RVs if var.name not in varnames]
--> 723         return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
    724 
    725     @property

~/pymc37/lib/python3.6/site-packages/pymc3/model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs)
    454             self._cost, grad_vars, self._ordering.vmap)
    455 
--> 456         grad = tt.grad(self._cost_joined, self._vars_joined)
    457         grad.name = '__grad'
    458 

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
    603 
    604     rval = _populate_grad_dict(var_to_app_to_idx,
--> 605                                grad_dict, wrt, cost_name)
    606 
    607     for i in xrange(len(rval)):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in _populate_grad_dict(var_to_app_to_idx, grad_dict, wrt, cost_name)
   1369         return grad_dict[var]
   1370 
-> 1371     rval = [access_grad_cache(elem) for elem in wrt]
   1372 
   1373     return rval

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
   1369         return grad_dict[var]
   1370 
-> 1371     rval = [access_grad_cache(elem) for elem in wrt]
   1372 
   1373     return rval

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in <listcomp>(.0)
   1019             inputs = node.inputs
   1020 
-> 1021             output_grads = [access_grad_cache(var) for var in node.outputs]
   1022 
   1023             # list of bools indicating if each output is connected to the cost

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_grad_cache(var)
   1324                     for idx in node_to_idx[node]:
   1325 
-> 1326                         term = access_term_cache(node)[idx]
   1327 
   1328                         if not isinstance(term, gof.Variable):

~/pymc37/lib/python3.6/site-packages/theano/gradient.py in access_term_cache(node)
   1160 
   1161                 input_grads = node.op.L_op(inputs, node.outputs,
-> 1162                                            new_output_grads)
   1163 
   1164                 if input_grads is None:

~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in L_op(self, inputs, outs, ograds)
    541 
    542         # compute grad with respect to broadcasted input
--> 543         rval = self._bgrad(inputs, outs, ograds)
    544 
    545         # TODO: make sure that zeros are clearly identifiable

~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in _bgrad(self, inputs, outputs, ograds)
    641                 ret.append(None)
    642                 continue
--> 643             ret.append(transform(scalar_igrad))
    644 
    645         return ret

~/pymc37/lib/python3.6/site-packages/theano/tensor/elemwise.py in transform(r)
    633 
    634             new_r = Elemwise(node.op, {})(
--> 635                 *[transform(ipt) for ipt in node.inputs])
    636             return new_r
    637         ret = []

~/pymc37/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    672                 thunk.outputs = [storage_map[v] for v in node.outputs]
    673 
--> 674                 required = thunk()
    675                 assert not required  # We provided all inputs
    676 

~/pymc37/lib/python3.6/site-packages/theano/gof/op.py in rval()
    860 
    861         def rval():
--> 862             thunk()
    863             for o in node.outputs:
    864                 compute_map[o][0] = True

~/pymc37/lib/python3.6/site-packages/theano/gof/cc.py in __call__(self)
   1737                 print(self.error_storage, file=sys.stderr)
   1738                 raise
-> 1739             reraise(exc_type, exc_value, exc_trace)
   1740 
   1741 

~/pymc37/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
    691             if value.__traceback__ is not tb:
    692                 raise value.with_traceback(tb)
--> 693             raise value
    694         finally:
    695             value = None

ValueError: Input dimension mis-match. (input[0].shape[0] = 5, input[1].shape[0] = 6)
```

Where 6 is the shape of the new data and 5 is the shape of the old data.

This seems like a rather bizarre error. Any ideas what's going on?

## Versions and main components

* PyMC3 Version: 3.7
* Theano Version: 1.0.4
* Python Version: 3.6.5
* Operating system: OS Mojave 10.14.4
* How did you install PyMC3: pip


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Shape mismatch when using pm.Data #3631

Versions and main components

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Shape mismatch when using pm.Data #3631

Description

Versions and main components

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions