From c700144d9906d02289328f8d62e3e41ef4a1d29f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 19 May 2021 11:26:51 +0100 Subject: [PATCH 1/3] [ArrowStringArray] PERF: bypass some padding code in _wrap_result --- pandas/core/strings/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 43df34a7ecbb2..30b2f0e377c12 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -284,7 +284,7 @@ def cons_row(x): return [x] result = [cons_row(x) for x in result] - if result: + if result and not self._is_string: # propagate nan values to match longest sequence (GH 18450) max_len = max(len(x) for x in result) result = [ From 8e584a44de47620f4f1c7119af5255ec69cb97a8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 19 May 2021 12:28:38 +0100 Subject: [PATCH 2/3] use mask --- pandas/core/strings/accessor.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 30b2f0e377c12..551e2c76f9782 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -277,19 +277,13 @@ def _wrap_result( # required when expand=True is explicitly specified # not needed when inferred - def cons_row(x): - if is_list_like(x): - return x - else: - return [x] + mask = isna(result) + result = [[x] if mask[i] else x for i, x in enumerate(result)] - result = [cons_row(x) for x in result] if result and not self._is_string: # propagate nan values to match longest sequence (GH 18450) max_len = max(len(x) for x in result) - result = [ - x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result - ] + result = [x * max_len if mask[i] else x for i, x in enumerate(result)] if not isinstance(expand, bool): raise ValueError("expand must be True or False") From 1c883e0d50be2ba04dbb90312b4e651d7541a824 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 19 May 2021 12:39:46 +0100 Subject: [PATCH 3/3] Revert "use mask" This reverts commit 8e584a44de47620f4f1c7119af5255ec69cb97a8. --- pandas/core/strings/accessor.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 551e2c76f9782..30b2f0e377c12 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -277,13 +277,19 @@ def _wrap_result( # required when expand=True is explicitly specified # not needed when inferred - mask = isna(result) - result = [[x] if mask[i] else x for i, x in enumerate(result)] + def cons_row(x): + if is_list_like(x): + return x + else: + return [x] + result = [cons_row(x) for x in result] if result and not self._is_string: # propagate nan values to match longest sequence (GH 18450) max_len = max(len(x) for x in result) - result = [x * max_len if mask[i] else x for i, x in enumerate(result)] + result = [ + x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result + ] if not isinstance(expand, bool): raise ValueError("expand must be True or False")