BUG: to_numpy not respecting na_value before converting to array (#50506)

phofl · web-flow · commit 60dc3f544178 · 2023-01-05T15:41:24.000-08:00
* BUG: to_numpy not respecting na_value before converting to array

* Adjust whatsnew
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -382,4 +382,23 @@ def time_iter(self, dtype):
             pass
 
 
+class ToNumpy:
+    def setup(self):
+        N = 1_000_000
+        self.ser = Series(
+            np.random.randn(
+                N,
+            )
+        )
+
+    def time_to_numpy(self):
+        self.ser.to_numpy()
+
+    def time_to_numpy_double_copy(self):
+        self.ser.to_numpy(dtype="float64", copy=True)
+
+    def time_to_numpy_copy(self):
+        self.ser.to_numpy(copy=True)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -780,6 +780,7 @@ Performance improvements
 - Performance improvement when iterating over pyarrow and nullable dtypes (:issue:`49825`, :issue:`49851`)
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
+- Performance improvement in :meth:`Series.to_numpy` if ``copy=True`` by avoiding copying twice (:issue:`24345`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`)
 - Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`)
@@ -855,6 +856,7 @@ Conversion
 - Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
 - Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
 - Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`)
+- Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`)
 - Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
 - Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`)
 -
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -531,12 +531,19 @@ def to_numpy(
                 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
             )
 
-        result = np.asarray(self._values, dtype=dtype)
-        # TODO(GH-24345): Avoid potential double copy
-        if copy or na_value is not lib.no_default:
-            result = result.copy()
-            if na_value is not lib.no_default:
-                result[np.asanyarray(self.isna())] = na_value
+        if na_value is not lib.no_default:
+            values = self._values.copy()
+            values[np.asanyarray(self.isna())] = na_value
+        else:
+            values = self._values
+
+        result = np.asarray(values, dtype=dtype)
+
+        if copy and na_value is lib.no_default:
+            if np.shares_memory(self._values[:2], result[:2]):
+                # Take slices to improve performance of check
+                result = result.copy()
+
         return result
 
     @final
diff --git a/pandas/tests/series/methods/test_to_numpy.py b/pandas/tests/series/methods/test_to_numpy.py
@@ -0,0 +1,17 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    NA,
+    Series,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("dtype", ["int64", "float64"])
+def test_to_numpy_na_value(dtype):
+    # GH#48951
+    ser = Series([1, 2, NA, 4])
+    result = ser.to_numpy(dtype=dtype, na_value=0)
+    expected = np.array([1, 2, 0, 4], dtype=dtype)
+    tm.assert_numpy_array_equal(result, expected)