From e9c104705d130ff0bfe7d1fb5040e82487c43141 Mon Sep 17 00:00:00 2001 From: anton-d Date: Tue, 1 Apr 2014 14:44:40 +0200 Subject: [PATCH] DOC: documented that .apply(func) executes func twice on the first time Related issues are #2656, #2936 and #6753. --- doc/source/groupby.rst | 19 +++++++++++++++++++ pandas/core/frame.py | 8 ++++++++ pandas/core/groupby.py | 9 ++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index cc5ebc730f94a..7412d25b33125 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -663,6 +663,25 @@ The dimension of the returned result can also change: s s.apply(f) + +.. warning:: + + In the current implementation apply calls func twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + group. + + .. ipython:: python + + d = DataFrame({"a":["x", "y"], "b":[1,2]}) + def identity(df): + print df + return df + + d.groupby("a").apply(identity) + + Other useful features --------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ecdd4d8b351d..3329483a61f5c 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3302,6 +3302,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, array/series Additional keyword arguments will be passed as keywords to the function + Notes + ----- + In the current implementation apply calls func twice on the + first column/row to decide whether it can take a fast or slow + code path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + column/row. + Examples -------- >>> df.apply(numpy.sqrt) # returns DataFrame diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 996a691eca082..8fd49bd2fe5bd 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -547,7 +547,14 @@ def apply(self, func, *args, **kwargs): Notes ----- - See online documentation for full exposition on how to use apply + See online documentation for full exposition on how to use apply. + + In the current implementation apply calls func twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + group. + See also --------