Closed
Description
xref: #614
I'm sure this is possible with #606 so this issue is mostly just to document my attempt to get this working. Out-of-the box (1.22.0+9.gdb758d0f
), attempting to pass an arrow Table
or RecordBatch
results in a TypeError
:
TypeError: no default __reduce__ due to non-trivial __cinit__
from distributed import Client
import pandas as pd
import pyarrow as pa
client = Client()
df = pd.DataFrame({'A': list('abc'), 'B': [1,2,3]})
tbl = pa.Table.from_pandas(df, preserve_index=False)
def echo(arg):
return arg
>>> client.submit(echo, df).result().equals(df)
True
>>> client.submit(echo, tbl).result()
distributed.protocol.pickle - INFO - Failed to serialize (pyarrow.Table
A: string
B: int64
metadata
--------
{b'pandas': b'{"index_columns": [], "column_indexes": [], "columns": [{"name":'
b' "A", "field_name": "A", "pandas_type": "unicode", "numpy_type":'
b' "object", "metadata": null}, {"name": "B", "field_name": "B", "'
b'pandas_type": "int64", "numpy_type": "int64", "metadata": null}]'
b', "pandas_version": "0.23.1"}'},). Exception: no default __reduce__ due to non-trivial __cinit__
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
C:\Miniconda3\lib\site-packages\distributed\protocol\pickle.py in dumps(x)
37 try:
---> 38 result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
39 if len(result) < 1000:
C:\Miniconda3\lib\site-packages\pyarrow\lib.cp36-win_amd64.pyd in pyarrow.lib.RecordBatch.__reduce_cython__()
TypeError: no default __reduce__ due to non-trivial __cinit__
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-48-4e8e2ea90e79> in <module>()
----> 1 client.submit(echo, tbl).result()
C:\Miniconda3\lib\site-packages\distributed\client.py in submit(self, func, *args, **kwargs)
1236 resources={skey: resources} if resources else None,
1237 retries=retries,
-> 1238 fifo_timeout=fifo_timeout)
1239
1240 logger.debug("Submit %s(...), %s", funcname(func), key)
C:\Miniconda3\lib\site-packages\distributed\client.py in _graph_to_futures(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout)
2093
2094 self._send_to_scheduler({'op': 'update-graph',
-> 2095 'tasks': valmap(dumps_task, dsk3),
2096 'dependencies': dependencies,
2097 'keys': list(flatkeys),
C:\Miniconda3\lib\site-packages\cytoolz\dicttoolz.pyx in cytoolz.dicttoolz.valmap()
C:\Miniconda3\lib\site-packages\cytoolz\dicttoolz.pyx in cytoolz.dicttoolz.valmap()
C:\Miniconda3\lib\site-packages\distributed\worker.py in dumps_task(task)
799 elif not any(map(_maybe_complex, task[1:])):
800 return {'function': dumps_function(task[0]),
--> 801 'args': warn_dumps(task[1:])}
802 return to_serialize(task)
803
C:\Miniconda3\lib\site-packages\distributed\worker.py in warn_dumps(obj, dumps, limit)
808 def warn_dumps(obj, dumps=pickle.dumps, limit=1e6):
809 """ Dump an object to bytes, warn if those bytes are large """
--> 810 b = dumps(obj)
811 if not _warn_dumps_warned[0] and len(b) > limit:
812 _warn_dumps_warned[0] = True
C:\Miniconda3\lib\site-packages\distributed\protocol\pickle.py in dumps(x)
49 except Exception:
50 try:
---> 51 return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
52 except Exception as e:
53 logger.info("Failed to serialize %s. Exception: %s", x, e)
C:\Miniconda3\lib\site-packages\cloudpickle\cloudpickle.py in dumps(obj, protocol)
893 try:
894 cp = CloudPickler(file, protocol=protocol)
--> 895 cp.dump(obj)
896 return file.getvalue()
897 finally:
C:\Miniconda3\lib\site-packages\cloudpickle\cloudpickle.py in dump(self, obj)
266 self.inject_addons()
267 try:
--> 268 return Pickler.dump(self, obj)
269 except RuntimeError as e:
270 if 'recursion' in e.args[0]:
C:\Miniconda3\lib\pickle.py in dump(self, obj)
407 if self.proto >= 4:
408 self.framer.start_framing()
--> 409 self.save(obj)
410 self.write(STOP)
411 self.framer.end_framing()
C:\Miniconda3\lib\pickle.py in save(self, obj, save_persistent_id)
474 f = self.dispatch.get(t)
475 if f is not None:
--> 476 f(self, obj) # Call unbound method with explicit self
477 return
478
C:\Miniconda3\lib\pickle.py in save_tuple(self, obj)
734 if n <= 3 and self.proto >= 2:
735 for element in obj:
--> 736 save(element)
737 # Subtle. Same as in the big comment below.
738 if id(obj) in memo:
C:\Miniconda3\lib\pickle.py in save(self, obj, save_persistent_id)
494 reduce = getattr(obj, "__reduce_ex__", None)
495 if reduce is not None:
--> 496 rv = reduce(self.proto)
497 else:
498 reduce = getattr(obj, "__reduce__", None)
C:\Miniconda3\lib\site-packages\pyarrow\lib.cp36-win_amd64.pyd in pyarrow.lib.RecordBatch.__reduce_cython__()
TypeError: no default __reduce__ due to non-trivial __cinit__
Metadata
Metadata
Assignees
Labels
No labels