Skip to content

Commit a0263b0

Browse files
authored
Add Result.to_df to export records as pandas DataFrame (#678)
Backport of #663
1 parent c11f9d2 commit a0263b0

File tree

4 files changed

+91
-9
lines changed

4 files changed

+91
-9
lines changed

docs/source/api.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -745,15 +745,13 @@ A :class:`neo4j.Result` is attached to an active connection, through a :class:`n
745745

746746
.. automethod:: graph
747747

748-
**This is experimental.** (See :ref:`filter-warnings-ref`)
749-
750748
.. automethod:: value
751749

752750
.. automethod:: values
753751

754752
.. automethod:: data
755753

756-
See https://neo4j.com/docs/driver-manual/current/cypher-workflow/#driver-type-mapping for more about type mapping.
754+
See https://neo4j.com/docs/python-manual/current/cypher-workflow/#python-driver-type-mapping for more about type mapping.
757755

758756

759757
Graph

neo4j/work/result.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424

2525
from neo4j.data import DataDehydrator
2626
from neo4j.io import ConnectionErrorHandler
27+
from neo4j.meta import experimental
2728
from neo4j.work.summary import ResultSummary
28-
from neo4j.exceptions import ResultConsumedError
2929

3030

3131
class Result:
@@ -335,6 +335,8 @@ def graph(self):
335335
336336
:returns: a result graph
337337
:rtype: :class:`neo4j.graph.Graph`
338+
339+
**This is experimental.** (See :ref:`filter-warnings-ref`)
338340
"""
339341
self._buffer_all()
340342
return self._hydrant.graph
@@ -372,3 +374,28 @@ def data(self, *keys):
372374
:rtype: list
373375
"""
374376
return [record.data(*keys) for record in self]
377+
378+
@experimental("pandas support is experimental and might be changed or "
379+
"removed in future versions")
380+
def to_df(self):
381+
"""Convert (the rest of) the result to a pandas DataFrame.
382+
383+
This method is only available if the `pandas` library is installed.
384+
385+
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
386+
instance will return a DataFrame with two columns: ``n`` and ``m`` and
387+
10 rows.
388+
389+
:rtype: :py:class:`pandas.DataFrame`
390+
:raises ImportError: if `pandas` library is not available.
391+
392+
.. versionadded:: 5.0
393+
This method was backported from 5.0 for preview purposes.
394+
395+
**This is experimental.**
396+
``pandas`` support might be changed or removed in future versions
397+
without warning. (See :ref:`filter-warnings-ref`)
398+
"""
399+
import pandas as pd
400+
401+
return pd.DataFrame(self.values(), columns=self._keys)

tests/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ pytest-benchmark
55
pytest-cov
66
pytest-mock
77
teamcity-messages
8+
pandas>=1.0.0

tests/unit/work/test_result.py

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from unittest import mock
2323

24+
import pandas as pd
2425
import pytest
2526

2627
from neo4j import (
@@ -31,16 +32,20 @@
3132
SummaryCounters,
3233
Version,
3334
)
34-
from neo4j.data import DataHydrator
35+
from neo4j.data import (
36+
DataHydrator,
37+
Node,
38+
Relationship,
39+
)
40+
from neo4j.packstream import Structure
3541
from neo4j.work.result import Result
3642

3743

3844
class Records:
3945
def __init__(self, fields, records):
40-
assert all(len(fields) == len(r) for r in records)
41-
self.fields = fields
42-
# self.records = [{"record_values": r} for r in records]
43-
self.records = records
46+
self.fields = tuple(fields)
47+
self.records = tuple(records)
48+
assert all(len(self.fields) == len(r) for r in self.records)
4449

4550
def __len__(self):
4651
return self.records.__len__()
@@ -422,3 +427,54 @@ def test_data(num_records):
422427
assert result.data("hello", "world") == expected_data
423428
for record in records:
424429
assert record.data.called_once_with("hello", "world")
430+
431+
432+
@pytest.mark.parametrize(
433+
("keys", "values", "types", "instances"),
434+
(
435+
(["i"], zip(range(5)), ["int64"], None),
436+
(["x"], zip((n - .5) / 5 for n in range(5)), ["float64"], None),
437+
(["s"], zip(("foo", "bar", "baz", "foobar")), ["object"], None),
438+
(["l"], zip(([1, 2], [3, 4])), ["object"], None),
439+
(
440+
["n"],
441+
zip((
442+
Structure(b"N", 0, ["LABEL_A"], {"a": 1, "b": 2}),
443+
Structure(b"N", 2, ["LABEL_B"], {"a": 1, "c": 1.2}),
444+
Structure(b"N", 1, ["LABEL_A", "LABEL_B"], {"a": [1, "a"]}),
445+
)),
446+
["object"],
447+
[Node]
448+
),
449+
(
450+
["r"],
451+
zip((
452+
Structure(b"R", 0, 1, 2, "TYPE", {"a": 1, "b": 2}),
453+
Structure(b"R", 420, 1337, 69, "HYPE", {"all memes": True}),
454+
)),
455+
["object"],
456+
[Relationship]
457+
),
458+
)
459+
)
460+
def test_to_df(keys, values, types, instances):
461+
values = list(values)
462+
connection = ConnectionStub(records=Records(keys, values))
463+
result = Result(connection, DataHydrator(), 1, noop, noop)
464+
result._run("CYPHER", {}, None, None, "r", None)
465+
df = result.to_df()
466+
467+
assert isinstance(df, pd.DataFrame)
468+
assert df.keys().to_list() == keys
469+
assert len(df) == len(values)
470+
assert df.dtypes.to_list() == types
471+
472+
expected_df = pd.DataFrame(
473+
{k: [v[i] for v in values] for i, k in enumerate(keys)}
474+
)
475+
476+
if instances:
477+
for i, k in enumerate(keys):
478+
assert all(isinstance(v, instances[i]) for v in df[k])
479+
else:
480+
assert df.equals(expected_df)

0 commit comments

Comments
 (0)